From 18c0eb6e1f1ab90c9a0f7c39c56618996e1a982f Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 5 Nov 2024 07:37:21 +1100 Subject: [PATCH] Updated vkd3d to 794f4c30f4873841aaa0c5c9745ee732437e70db. --- libs/vkd3d/libs/vkd3d-shader/fx.c | 2 +- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 265 +++++++++--- libs/vkd3d/libs/vkd3d-shader/ir.c | 397 +++++++++++++++--- libs/vkd3d/libs/vkd3d-shader/tpf.c | 92 +++- .../libs/vkd3d-shader/vkd3d_shader_main.c | 3 + .../libs/vkd3d-shader/vkd3d_shader_private.h | 5 + 6 files changed, 637 insertions(+), 127 deletions(-) diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index 8954feb22b7..5382dd94f98 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -3295,7 +3295,7 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int parse_fx_print_indent(parser); vkd3d_string_buffer_printf(&parser->buffer, "}"); - if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER && gs_so.sodecl) + if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && gs_so.sodecl) { vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */", fx_4_get_string(parser, gs_so.sodecl)); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index ce431ee6815..213e403dcbd 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -6579,7 +6579,43 @@ static void generate_vsir_signature(struct hlsl_ctx *ctx, } } -static uint32_t sm1_generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask) +static enum vkd3d_data_type vsir_data_type_from_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type) +{ + if (hlsl_version_lt(ctx, 4, 0)) + return VKD3D_DATA_FLOAT; + + if (type->class == HLSL_CLASS_ARRAY) + return vsir_data_type_from_hlsl_type(ctx, type->e.array.type); + if (type->class == HLSL_CLASS_STRUCT) + return VKD3D_DATA_MIXED; + if (type->class <= HLSL_CLASS_LAST_NUMERIC) + { + switch (type->e.numeric.type) + { + case HLSL_TYPE_DOUBLE: + return VKD3D_DATA_DOUBLE; + case HLSL_TYPE_FLOAT: + return VKD3D_DATA_FLOAT; + case HLSL_TYPE_HALF: + return VKD3D_DATA_HALF; + case HLSL_TYPE_INT: + return VKD3D_DATA_INT; + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + return VKD3D_DATA_UINT; + } + } + + vkd3d_unreachable(); +} + +static enum vkd3d_data_type vsir_data_type_from_hlsl_instruction(struct hlsl_ctx *ctx, + const struct hlsl_ir_node *instr) +{ + return vsir_data_type_from_hlsl_type(ctx, instr->data_type); +} + +static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask) { uint32_t swizzle; @@ -6737,6 +6773,50 @@ static struct vkd3d_shader_instruction *generate_vsir_add_program_instruction( return ins; } +static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src, + struct hlsl_ctx *ctx, const struct hlsl_constant_value *value, + enum vkd3d_data_type type, unsigned int width, unsigned int map_writemask) +{ + unsigned int i, j; + + vsir_src_param_init(src, VKD3DSPR_IMMCONST, type, 0); + if (width == 1) + { + src->reg.u.immconst_u32[0] = value->u[0].u; + return; + } + + src->reg.dimension = VSIR_DIMENSION_VEC4; + for (i = 0, j = 0; i < 4; ++i) + { + if ((map_writemask & (1u << i)) && (j < width)) + src->reg.u.immconst_u32[i] = value->u[j++].u; + else + src->reg.u.immconst_u32[i] = 0; + } +} + +static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, + struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, uint32_t map_writemask) +{ + struct hlsl_ir_constant *constant; + + if (hlsl_version_ge(ctx, 4, 0) && instr->type == HLSL_IR_CONSTANT) + { + /* In SM4 constants are inlined */ + constant = hlsl_ir_constant(instr); + vsir_src_from_hlsl_constant_value(src, ctx, &constant->value, + vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->dimx, map_writemask); + } + else + { + vsir_register_init(&src->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); + src->reg.idx[0].offset = instr->reg.id; + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = generate_vsir_get_src_swizzle(instr->reg.writemask, map_writemask); + } +} + static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_constant *constant) { @@ -6754,7 +6834,7 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, src_param = &ins->src[0]; vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); src_param->reg.idx[0].offset = constant->reg.id; - src_param->swizzle = sm1_generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); + src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); dst_param = &ins->dst[0]; vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); @@ -6763,9 +6843,9 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, } /* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ -static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, struct vsir_program *program, - struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, uint32_t src_mod, uint32_t dst_mod, - bool map_src_swizzles) +static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, + uint32_t src_mod, uint32_t dst_mod, bool map_src_swizzles) { struct hlsl_ir_node *instr = &expr->node; struct vkd3d_shader_dst_param *dst_param; @@ -6786,8 +6866,9 @@ static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, s return; dst_param = &ins->dst[0]; - vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->reg.dimension = VSIR_DIMENSION_VEC4; dst_param->write_mask = instr->reg.writemask; dst_param->modifiers = dst_mod; @@ -6796,9 +6877,7 @@ static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, s struct hlsl_ir_node *operand = expr->operands[i].node; src_param = &ins->src[i]; - vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - src_param->reg.idx[0].offset = operand->reg.id; - src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, + vsir_src_from_hlsl_node(src_param, ctx, operand, map_src_swizzles ? dst_param->write_mask : VKD3DSP_WRITEMASK_ALL); src_param->modifiers = src_mod; } @@ -6820,7 +6899,7 @@ static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx VKD3D_ASSERT(instr->reg.allocated); VKD3D_ASSERT(operand); - src_swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, instr->reg.writemask); + src_swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, instr->reg.writemask); for (i = 0; i < 4; ++i) { if (instr->reg.writemask & (1u << i)) @@ -6866,7 +6945,7 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi src_param = &ins->src[0]; vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); src_param->reg.idx[0].offset = operand->reg.id; - src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL); + src_param->swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL); if (ctx->profile->major_version < 3) { @@ -6908,13 +6987,13 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, /* Integrals are internally represented as floats, so no change is necessary.*/ case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); return true; case HLSL_TYPE_DOUBLE: if (ctx->double_as_float_alias) { - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); return true; } hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, @@ -6939,7 +7018,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); return true; case HLSL_TYPE_BOOL: @@ -6961,7 +7040,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, case HLSL_TYPE_FLOAT: if (ctx->double_as_float_alias) { - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); return true; } hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, @@ -7001,7 +7080,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr switch (expr->op) { case HLSL_OP1_ABS: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true); break; case HLSL_OP1_CAST: @@ -7013,11 +7092,11 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr break; case HLSL_OP1_DSX: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); break; case HLSL_OP1_DSY: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); break; case HLSL_OP1_EXP2: @@ -7029,7 +7108,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr break; case HLSL_OP1_NEG: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); break; case HLSL_OP1_RCP: @@ -7037,7 +7116,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr break; case HLSL_OP1_REINTERPRET: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); break; case HLSL_OP1_RSQ: @@ -7045,7 +7124,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr break; case HLSL_OP1_SAT: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); break; case HLSL_OP1_SIN_REDUCED: @@ -7054,18 +7133,18 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr break; case HLSL_OP2_ADD: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); break; case HLSL_OP2_DOT: switch (expr->operands[0].node->data_type->dimx) { case 3: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); break; case 4: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); break; default: @@ -7075,43 +7154,43 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr break; case HLSL_OP2_MAX: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); break; case HLSL_OP2_MIN: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); break; case HLSL_OP2_MUL: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); break; case HLSL_OP1_FRACT: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); break; case HLSL_OP2_LOGIC_AND: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); break; case HLSL_OP2_LOGIC_OR: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); break; case HLSL_OP2_SLT: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true); break; case HLSL_OP3_CMP: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true); break; case HLSL_OP3_DP2ADD: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2ADD, 0, 0, false); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2ADD, 0, 0, false); break; case HLSL_OP3_MAD: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); break; default: @@ -7227,7 +7306,7 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1); src_param->reg.idx[0].offset = register_index; - src_param->swizzle = sm1_generate_vsir_get_src_swizzle(writemask, dst_writemask); + src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); if (deref->rel_offset.node) hlsl_fixme(ctx, loc, "Translate relative addressing on src register for vsir."); @@ -7306,9 +7385,7 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, dst_param->write_mask = instr->reg.writemask; src_param = &ins->src[0]; - vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - src_param->reg.idx[0].offset = coords->reg.id; - src_param->swizzle = sm1_generate_vsir_get_src_swizzle(coords->reg.writemask, VKD3DSP_WRITEMASK_ALL); + vsir_src_from_hlsl_node(src_param, ctx, coords, VKD3DSP_WRITEMASK_ALL); sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[1], &load->resource, VKD3DSP_WRITEMASK_ALL, &ins->location); @@ -7316,19 +7393,15 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) { src_param = &ins->src[2]; - vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - src_param->reg.idx[0].offset = ddx->reg.id; - src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddx->reg.writemask, VKD3DSP_WRITEMASK_ALL); + vsir_src_from_hlsl_node(src_param, ctx, ddx, VKD3DSP_WRITEMASK_ALL); src_param = &ins->src[3]; - vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - src_param->reg.idx[0].offset = ddy->reg.id; - src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddy->reg.writemask, VKD3DSP_WRITEMASK_ALL); + vsir_src_from_hlsl_node(src_param, ctx, ddy, VKD3DSP_WRITEMASK_ALL); } } -static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_program *program, - struct hlsl_ir_swizzle *swizzle_instr) +static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_swizzle *swizzle_instr) { struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node; struct vkd3d_shader_dst_param *dst_param; @@ -7342,8 +7415,9 @@ static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_pr return; dst_param = &ins->dst[0]; - vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->reg.dimension = VSIR_DIMENSION_VEC4; dst_param->write_mask = instr->reg.writemask; swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); @@ -7352,8 +7426,10 @@ static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_pr swizzle = vsir_swizzle_from_hlsl(swizzle); src_param = &ins->src[0]; - vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT); + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, val), 1); src_param->reg.idx[0].offset = val->reg.id; + src_param->reg.dimension = VSIR_DIMENSION_VEC4; src_param->swizzle = swizzle; } @@ -7371,9 +7447,7 @@ static void sm1_generate_vsir_instr_store(struct hlsl_ctx *ctx, struct vsir_prog sm1_generate_vsir_init_dst_param_from_deref(ctx, &ins->dst[0], &store->lhs, &ins->location, store->writemask); src_param = &ins->src[0]; - vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - src_param->reg.idx[0].offset = rhs->reg.id; - src_param->swizzle = sm1_generate_vsir_get_src_swizzle(rhs->reg.writemask, ins->dst[0].write_mask); + vsir_src_from_hlsl_node(src_param, ctx, rhs, ins->dst[0].write_mask); } static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, @@ -7408,7 +7482,6 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program struct vkd3d_shader_src_param *src_param; struct hlsl_ir_node *instr = &iff->node; struct vkd3d_shader_instruction *ins; - uint32_t swizzle; if (hlsl_version_lt(ctx, 2, 1)) { @@ -7421,19 +7494,12 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program return; ins->flags = VKD3D_SHADER_REL_OP_NE; - swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask); - swizzle = vsir_swizzle_from_hlsl(swizzle); - src_param = &ins->src[0]; - vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - src_param->reg.idx[0].offset = condition->reg.id; - src_param->swizzle = swizzle; + vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL); src_param->modifiers = 0; src_param = &ins->src[1]; - vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - src_param->reg.idx[0].offset = condition->reg.id; - src_param->swizzle = swizzle; + vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL); src_param->modifiers = VKD3DSPSM_NEG; sm1_generate_vsir_block(ctx, &iff->then_block, program); @@ -7496,7 +7562,7 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo break; case HLSL_IR_SWIZZLE: - sm1_generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); + generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); break; default: @@ -7557,6 +7623,25 @@ static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_progr hlsl_block_add_instr(block, vsir_instr); } +static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_node *instr) +{ + struct vkd3d_shader_location *loc; + struct hlsl_ir_node *vsir_instr; + + loc = &program->instructions.elements[program->instructions.count - 1].location; + + if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, + program->instructions.count - 1, instr->data_type, &instr->reg, loc))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + + list_add_before(&instr->entry, &vsir_instr->entry); + hlsl_replace_node(instr, vsir_instr); +} + static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, uint32_t temp_count, struct hlsl_block *block, const struct vkd3d_shader_location *loc) { @@ -7589,6 +7674,60 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, add_last_vsir_instr_to_block(ctx, program, block); } +static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_expr *expr) +{ + switch (expr->op) + { + case HLSL_OP1_ABS: + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true); + return true; + + default: + return false; + } +} + +static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) +{ + struct hlsl_ir_node *instr, *next; + + LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->data_type) + { + if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) + { + hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); + break; + } + } + + switch (instr->type) + { + case HLSL_IR_CALL: + vkd3d_unreachable(); + + case HLSL_IR_CONSTANT: + /* In SM4 all constants are inlined. */ + break; + + case HLSL_IR_EXPR: + if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr))) + replace_instr_with_last_vsir_instr(ctx, program, instr); + break; + + case HLSL_IR_SWIZZLE: + generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); + replace_instr_with_last_vsir_instr(ctx, program, instr); + break; + + default: + break; + } + } +} + static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) { @@ -7631,6 +7770,8 @@ static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, list_move_head(&func->body.instrs, &block.instrs); hlsl_block_cleanup(&block); + + sm4_generate_vsir_block(ctx, &func->body, program); } /* OBJECTIVE: Translate all the information from ctx and entry_func to the diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index 19dc36d9191..9b50a308e11 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -129,6 +129,38 @@ const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( return NULL; } +static struct signature_element *vsir_signature_find_element_by_name( + const struct shader_signature *signature, const char *semantic_name, unsigned int semantic_index) +{ + for (unsigned int i = 0; i < signature->element_count; ++i) + { + if (!ascii_strcasecmp(signature->elements[i].semantic_name, semantic_name) + && signature->elements[i].semantic_index == semantic_index) + return &signature->elements[i]; + } + + return NULL; +} + +static bool vsir_signature_find_sysval(const struct shader_signature *signature, + enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index) +{ + const struct signature_element *e; + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + e = &signature->elements[i]; + if (e->sysval_semantic == sysval && e->semantic_index == semantic_index) + { + *element_index = i; + return true; + } + } + + return false; +} + void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, enum vkd3d_data_type data_type, unsigned int idx_count) { @@ -277,6 +309,15 @@ static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigne dst->reg.idx[0].offset = idx; } +static void dst_param_init_output(struct vkd3d_shader_dst_param *dst, + enum vkd3d_data_type data_type, uint32_t idx, uint32_t write_mask) +{ + vsir_dst_param_init(dst, VKD3DSPR_OUTPUT, data_type, 1); + dst->reg.idx[0].offset = idx; + dst->reg.dimension = VSIR_DIMENSION_VEC4; + dst->write_mask = write_mask; +} + void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode opcode) { @@ -789,6 +830,98 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i shader_register_eliminate_phase_addressing(&ins->dst[i].reg, instance_id); } +/* Ensure that the program closes with a ret. sm1 programs do not, by default. + * Many of our IR passes rely on this in order to insert instructions at the + * end of execution. */ +static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + static const struct vkd3d_shader_location no_loc; + if (program->instructions.count + && program->instructions.elements[program->instructions.count - 1].opcode == VKD3DSIH_RET) + return VKD3D_OK; + + if (!shader_instruction_array_insert_at(&program->instructions, program->instructions.count, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + vsir_instruction_init(&program->instructions.elements[program->instructions.count - 1], &no_loc, VKD3DSIH_RET); + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct shader_signature *signature = &program->output_signature; + struct signature_element *new_elements, *e; + + if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX) + return VKD3D_OK; + + if ((e = vsir_signature_find_element_by_name(signature, "COLOR", 0))) + { + program->diffuse_written_mask = e->mask; + e->mask = VKD3DSP_WRITEMASK_ALL; + + return VKD3D_OK; + } + + if (!(new_elements = vkd3d_realloc(signature->elements, + (signature->element_count + 1) * sizeof(*signature->elements)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + signature->elements = new_elements; + e = &signature->elements[signature->element_count++]; + memset(e, 0, sizeof(*e)); + e->semantic_name = vkd3d_strdup("COLOR"); + e->sysval_semantic = VKD3D_SHADER_SV_NONE; + e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; + e->register_count = 1; + e->mask = VKD3DSP_WRITEMASK_ALL; + e->used_mask = VKD3DSP_WRITEMASK_ALL; + e->register_index = SM1_COLOR_REGISTER_OFFSET; + e->target_location = SM1_COLOR_REGISTER_OFFSET; + e->interpolation_mode = VKD3DSIM_NONE; + + return VKD3D_OK; +} + +/* Uninitialized components of diffuse yield 1.0 in SM1-2. Implement this by + * always writing diffuse in those versions, even if the PS doesn't read it. */ +static enum vkd3d_result vsir_program_ensure_diffuse(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + static const struct vkd3d_shader_location no_loc; + struct vkd3d_shader_instruction *ins; + unsigned int i; + + if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX + || program->diffuse_written_mask == VKD3DSP_WRITEMASK_ALL) + return VKD3D_OK; + + /* Write the instruction after all LABEL, DCL, and NOP instructions. + * We need to skip NOP instructions because they might result from removed + * DCLs, and there could still be DCLs after NOPs. */ + for (i = 0; i < program->instructions.count; ++i) + { + ins = &program->instructions.elements[i]; + + if (!vsir_instruction_is_dcl(ins) && ins->opcode != VKD3DSIH_LABEL && ins->opcode != VKD3DSIH_NOP) + break; + } + + if (!shader_instruction_array_insert_at(&program->instructions, i, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + ins = &program->instructions.elements[i]; + vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_ATTROUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = 0; + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL & ~program->diffuse_written_mask; + vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + for (i = 0; i < 4; ++i) + ins->src[0].reg.u.immconst_f32[i] = 1.0f; + return VKD3D_OK; +} + static const struct vkd3d_shader_varying_map *find_varying_map( const struct vkd3d_shader_varying_map_info *varying_map, unsigned int signature_idx) { @@ -803,14 +936,88 @@ static const struct vkd3d_shader_varying_map *find_varying_map( return NULL; } +static bool target_allows_subset_masks(const struct vkd3d_shader_compile_info *info) +{ + const struct vkd3d_shader_spirv_target_info *spirv_info; + enum vkd3d_shader_spirv_environment environment; + + switch (info->target_type) + { + case VKD3D_SHADER_TARGET_SPIRV_BINARY: + spirv_info = vkd3d_find_struct(info->next, SPIRV_TARGET_INFO); + environment = spirv_info ? spirv_info->environment : VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; + + switch (environment) + { + case VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5: + return true; + + case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: + case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1: + /* FIXME: Allow KHR_maintenance4. */ + return false; + + default: + FIXME("Unrecognized environment %#x.\n", environment); + return false; + } + + default: + return true; + } +} + +static void remove_unread_output_components(const struct shader_signature *signature, + struct vkd3d_shader_instruction *ins, struct vkd3d_shader_dst_param *dst) +{ + const struct signature_element *e; + + switch (dst->reg.type) + { + case VKD3DSPR_OUTPUT: + e = vsir_signature_find_element_for_reg(signature, dst->reg.idx[0].offset, 0); + break; + + case VKD3DSPR_ATTROUT: + e = vsir_signature_find_element_for_reg(signature, + SM1_COLOR_REGISTER_OFFSET + dst->reg.idx[0].offset, 0); + break; + + case VKD3DSPR_RASTOUT: + e = vsir_signature_find_element_for_reg(signature, + SM1_RASTOUT_REGISTER_OFFSET + dst->reg.idx[0].offset, 0); + break; + + default: + return; + } + + /* We already changed the mask earlier. */ + dst->write_mask &= e->mask; + + if (!dst->write_mask) + { + if (ins->dst_count == 1) + vkd3d_shader_instruction_make_nop(ins); + else + vsir_dst_param_init(dst, VKD3DSPR_NULL, VKD3D_DATA_UNUSED, 0); + } +} + static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, struct vsir_transformation_context *ctx) { const struct vkd3d_shader_location location = {.source_name = ctx->compile_info->source_name}; struct vkd3d_shader_message_context *message_context = ctx->message_context; const struct vkd3d_shader_compile_info *compile_info = ctx->compile_info; + bool allows_subset_masks = target_allows_subset_masks(compile_info); struct shader_signature *signature = &program->output_signature; + unsigned int orig_element_count = signature->element_count; const struct vkd3d_shader_varying_map_info *varying_map; + struct signature_element *new_elements, *e; + unsigned int uninit_varying_count = 0; + unsigned int subset_varying_count = 0; + unsigned int new_register_count = 0; unsigned int i; if (!(varying_map = vkd3d_find_struct(compile_info->next, VARYING_MAP_INFO))) @@ -819,22 +1026,29 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program for (i = 0; i < signature->element_count; ++i) { const struct vkd3d_shader_varying_map *map = find_varying_map(varying_map, i); - struct signature_element *e = &signature->elements[i]; + e = &signature->elements[i]; if (map) { unsigned int input_mask = map->input_mask; e->target_location = map->input_register_index; - /* It is illegal in Vulkan if the next shader uses the same varying - * location with a different mask. */ - if (input_mask && input_mask != e->mask) + if ((input_mask & e->mask) == input_mask) + { + ++subset_varying_count; + if (!allows_subset_masks) + { + e->mask = input_mask; + e->used_mask &= input_mask; + } + } + else if (input_mask && input_mask != e->mask) { vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Aborting due to not yet implemented feature: " - "Output mask %#x does not match input mask %#x.", - e->mask, input_mask); + "Input mask %#x reads components not written in output mask %#x.", + input_mask, e->mask); return VKD3D_ERROR_NOT_IMPLEMENTED; } } @@ -842,17 +1056,103 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program { e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; } + + new_register_count = max(new_register_count, e->register_index + 1); } + /* Handle uninitialized varyings by writing them before every ret. + * + * As far as sm1-sm3 is concerned, drivers disagree on what uninitialized + * varyings contain. + * + * - Diffuse (COLOR0) reliably contains (1, 1, 1, 1) in SM1/2. + * In SM3 it may contain (0, 0, 0, 0), (0, 0, 0, 1), or (1, 1, 1, 1). + * + * - Specular (COLOR1) contains (0, 0, 0, 0) or (0, 0, 0, 1). + * WARP writes (1, 1, 1, 1). + * + * - Anything else contains (0, 0, 0, 0) or (0, 0, 0, 1). + * + * We don't have enough knowledge to identify diffuse here. Instead we deal + * with that in vsir_program_ensure_diffuse(), by always writing diffuse if + * the shader doesn't. + */ + for (i = 0; i < varying_map->varying_count; ++i) { if (varying_map->varying_map[i].output_signature_index >= signature->element_count) + ++uninit_varying_count; + } + + if (!(new_elements = vkd3d_realloc(signature->elements, + (signature->element_count + uninit_varying_count) * sizeof(*signature->elements)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + signature->elements = new_elements; + + for (i = 0; i < varying_map->varying_count; ++i) + { + const struct vkd3d_shader_varying_map *map = &varying_map->varying_map[i]; + + if (map->output_signature_index < orig_element_count) + continue; + + TRACE("Synthesizing zero value for uninitialized output %u (mask %u).\n", + map->input_register_index, map->input_mask); + e = &signature->elements[signature->element_count++]; + memset(e, 0, sizeof(*e)); + e->sysval_semantic = VKD3D_SHADER_SV_NONE; + e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; + e->register_count = 1; + e->mask = map->input_mask; + e->used_mask = map->input_mask; + e->register_index = new_register_count++; + e->target_location = map->input_register_index; + e->interpolation_mode = VKD3DSIM_LINEAR; + } + + /* Write each uninitialized varying before each ret. */ + for (i = 0; i < program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + struct vkd3d_shader_location loc; + + if (ins->opcode != VKD3DSIH_RET) + continue; + + loc = ins->location; + if (!shader_instruction_array_insert_at(&program->instructions, i, uninit_varying_count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + ins = &program->instructions.elements[i]; + + for (unsigned int j = signature->element_count - uninit_varying_count; j < signature->element_count; ++j) { - vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: " - "The next stage consumes varyings not written by this stage."); - return VKD3D_ERROR_NOT_IMPLEMENTED; + e = &signature->elements[j]; + + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); + dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, e->register_index, e->mask); + vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ++ins; } + + i += uninit_varying_count; + } + + /* Vulkan (without KHR_maintenance4) disallows any mismatching masks, + * including when the input mask is a proper subset of the output mask. + * Resolve this by rewriting the shader to remove unread components from + * any writes to the output variable. */ + + if (!subset_varying_count || allows_subset_masks) + return VKD3D_OK; + + for (i = 0; i < program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; + + for (unsigned int j = 0; j < ins->dst_count; ++j) + remove_unread_output_components(signature, ins, &ins->dst[j]); } return VKD3D_OK; @@ -2490,15 +2790,14 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener, struct vkd3d_shader_message_context *message_context) { - bool main_block_open, is_hull_shader, after_declarations_section; struct vkd3d_shader_instruction_array *instructions; struct vsir_program *program = flattener->program; + bool is_hull_shader, after_declarations_section; struct vkd3d_shader_instruction *dst_ins; size_t i; instructions = &program->instructions; is_hull_shader = program->shader_version.type == VKD3D_SHADER_TYPE_HULL; - main_block_open = !is_hull_shader; after_declarations_section = is_hull_shader; if (!cf_flattener_require_space(flattener, instructions->count + 1)) @@ -2822,8 +3121,6 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte if (cf_info) cf_info->inside_block = false; - else - main_block_open = false; break; default: @@ -2833,14 +3130,6 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte } } - if (main_block_open) - { - if (!(dst_ins = cf_flattener_require_space(flattener, 1))) - return VKD3D_ERROR_OUT_OF_MEMORY; - vsir_instruction_init(dst_ins, &flattener->location, VKD3DSIH_RET); - ++flattener->instruction_count; - } - return flattener->status; } @@ -5564,21 +5853,6 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru return VKD3D_OK; } -static bool find_colour_signature_idx(const struct shader_signature *signature, uint32_t *index) -{ - for (unsigned int i = 0; i < signature->element_count; ++i) - { - if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_TARGET - && !signature->elements[i].register_index) - { - *index = i; - return true; - } - } - - return false; -} - static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *program, const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_comparison_func compare_func, const struct vkd3d_shader_parameter1 *ref, uint32_t colour_signature_idx, @@ -5690,7 +5964,7 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) return VKD3D_OK; - if (!find_colour_signature_idx(&program->output_signature, &colour_signature_idx) + if (!vsir_signature_find_sysval(&program->output_signature, VKD3D_SHADER_SV_TARGET, 0, &colour_signature_idx) || !(program->output_signature.elements[colour_signature_idx].mask & VKD3DSP_WRITEMASK_3)) return VKD3D_OK; @@ -5808,21 +6082,6 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog return VKD3D_OK; } -static bool find_sysval_signature_idx(const struct shader_signature *signature, - enum vkd3d_shader_sysval_semantic sysval, uint32_t *idx) -{ - for (unsigned int i = 0; i < signature->element_count; ++i) - { - if (signature->elements[i].sysval_semantic == sysval) - { - *idx = i; - return true; - } - } - - return false; -} - static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *program, struct vsir_transformation_context *ctx) { @@ -5878,7 +6137,7 @@ static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *pr } } - if (!find_sysval_signature_idx(signature, VKD3D_SHADER_SV_POSITION, &position_signature_idx)) + if (!vsir_signature_find_sysval(signature, VKD3D_SHADER_SV_POSITION, 0, &position_signature_idx)) { vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, "Shader does not write position."); @@ -7988,6 +8247,31 @@ static void vsir_transform_( } } +/* Transformations which should happen at parse time, i.e. before scan + * information is returned to the user. + * + * In particular, some passes need to modify the signature, and + * vkd3d_shader_scan() should report the modified signature for the given + * target. */ +enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ + struct vsir_transformation_context ctx = + { + .result = VKD3D_OK, + .program = program, + .config_flags = config_flags, + .compile_info = compile_info, + .message_context = message_context, + }; + + /* For vsir_program_ensure_diffuse(). */ + if (program->shader_version.major <= 2) + vsir_transform(&ctx, vsir_program_add_diffuse_output); + + return ctx.result; +} + enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) { @@ -8012,6 +8296,11 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t } else { + vsir_transform(&ctx, vsir_program_ensure_ret); + + if (program->shader_version.major <= 2) + vsir_transform(&ctx, vsir_program_ensure_diffuse); + if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) vsir_transform(&ctx, vsir_program_remap_output_signature); diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index befe5eacf9c..bbd2f761d29 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -674,6 +674,7 @@ struct sm4_index_range_array struct vkd3d_sm4_lookup_tables { const struct vkd3d_sm4_opcode_info *opcode_info_from_sm4[VKD3D_SM4_OP_COUNT]; + const struct vkd3d_sm4_opcode_info *opcode_info_from_vsir[VKD3DSIH_COUNT]; const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT]; const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT]; const struct vkd3d_sm4_stat_field_info *stat_field_from_sm4[VKD3D_SM4_OP_COUNT]; @@ -1412,6 +1413,8 @@ struct tpf_compiler struct vkd3d_sm4_lookup_tables lookup; struct sm4_stat *stat; + int result; + struct vkd3d_bytecode_buffer *buffer; struct dxbc_writer dxbc; }; @@ -1903,6 +1906,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) const struct vkd3d_sm4_opcode_info *info = &opcode_table[i]; lookup->opcode_info_from_sm4[info->opcode] = info; + lookup->opcode_info_from_vsir[info->handler_idx] = info; } for (i = 0; i < ARRAY_SIZE(register_type_table); ++i) @@ -1929,6 +1933,24 @@ static const struct vkd3d_sm4_opcode_info *get_info_from_sm4_opcode( return lookup->opcode_info_from_sm4[sm4_opcode]; } +static const struct vkd3d_sm4_opcode_info *get_info_from_vsir_opcode( + const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_opcode vsir_opcode) +{ + if (vsir_opcode >= VKD3DSIH_COUNT) + return NULL; + return lookup->opcode_info_from_vsir[vsir_opcode]; +} + +static unsigned int opcode_info_get_dst_count(const struct vkd3d_sm4_opcode_info *info) +{ + return strnlen(info->dst_info, SM4_MAX_DST_COUNT); +} + +static unsigned int opcode_info_get_src_count(const struct vkd3d_sm4_opcode_info *info) +{ + return strnlen(info->src_info, SM4_MAX_SRC_COUNT); +} + static const struct vkd3d_sm4_register_type_info *get_info_from_sm4_register_type( const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_register_type sm4_type) { @@ -2651,8 +2673,8 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str ins->raw = false; ins->structured = false; ins->predicate = NULL; - ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); - ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); + ins->dst_count = opcode_info_get_dst_count(opcode_info); + ins->src_count = opcode_info_get_src_count(opcode_info); ins->src = src_params = vsir_program_get_src_params(program, ins->src_count); if (!src_params && ins->src_count) { @@ -2971,7 +2993,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con return VKD3D_OK; } -static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block); +static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block); static bool type_is_integer(const struct hlsl_type *type) { @@ -6094,11 +6116,12 @@ static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_ hlsl_release_string_buffer(tpf->ctx, dst_type_string); } -static void write_sm4_if(const struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) +static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) { struct sm4_instruction instr = { - .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, + .opcode = VKD3D_SM4_OP_IF, + .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, .src_count = 1, }; @@ -6210,7 +6233,7 @@ static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_ write_sm4_instruction(tpf, &instr); } -static void write_sm4_loop(const struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) +static void write_sm4_loop(struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) { struct sm4_instruction instr = { @@ -6394,7 +6417,7 @@ static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir write_sm4_instruction(tpf, &instr); } -static void write_sm4_switch(const struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) +static void write_sm4_switch(struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) { const struct hlsl_ir_node *selector = s->selector.node; struct hlsl_ir_switch_case *c; @@ -6455,7 +6478,46 @@ static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ write_sm4_instruction(tpf, &instr); } -static void tpf_handle_instruction(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) +static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) +{ + const struct vkd3d_sm4_opcode_info *info; + struct sm4_instruction instr = {0}; + unsigned int dst_count, src_count; + + info = get_info_from_vsir_opcode(&tpf->lookup, ins->opcode); + VKD3D_ASSERT(info); + + dst_count = opcode_info_get_dst_count(info); + src_count = opcode_info_get_src_count(info); + + if (ins->dst_count != dst_count) + { + ERR("Invalid destination count %u for vsir instruction %#x (expected %u).\n", + ins->dst_count, ins->opcode, dst_count); + tpf->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + if (ins->src_count != src_count) + { + ERR("Invalid source count %u for vsir instruction %#x (expected %u).\n", + ins->src_count, ins->opcode, src_count); + tpf->result = VKD3D_ERROR_INVALID_SHADER; + return; + } + + instr.opcode = info->opcode; + instr.dst_count = ins->dst_count; + instr.src_count = ins->src_count; + + for (unsigned int i = 0; i < ins->dst_count; ++i) + instr.dsts[i] = ins->dst[i]; + for (unsigned int i = 0; i < ins->src_count; ++i) + instr.srcs[i] = ins->src[i]; + + write_sm4_instruction(tpf, &instr); +} + +static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) { switch (ins->opcode) { @@ -6467,13 +6529,17 @@ static void tpf_handle_instruction(const struct tpf_compiler *tpf, const struct tpf_dcl_indexable_temp(tpf, &ins->declaration.indexable_temp); break; + case VKD3DSIH_MOV: + tpf_simple_instruction(tpf, ins); + break; + default: vkd3d_unreachable(); break; } } -static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block) +static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block) { const struct hlsl_ir_node *instr; unsigned int vsir_instr_idx; @@ -6765,7 +6831,13 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, tpf_write_sfi0(&tpf); tpf_write_stat(&tpf); - if (!(ret = ctx->result)) + ret = VKD3D_OK; + if (ctx->result) + ret = ctx->result; + if (tpf.result) + ret = tpf.result; + + if (!ret) ret = dxbc_writer_write(&tpf.dxbc, out); for (i = 0; i < tpf.dxbc.section_count; ++i) vkd3d_shader_free_shader_code(&tpf.dxbc.sections[i].data); diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index 3355e18b88e..3afac9a38a4 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -719,8 +719,11 @@ static enum vkd3d_result vsir_parse(const struct vkd3d_shader_compile_info *comp vsir_program_trace(program); vsir_program_cleanup(program); + return ret; } + if (compile_info->target_type != VKD3D_SHADER_TARGET_NONE) + ret = vsir_program_transform_early(program, config_flags, compile_info, message_context); return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index 5ae938e0525..1b6c37343d1 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -585,6 +585,8 @@ enum vkd3d_shader_opcode VKD3DSIH_XOR, VKD3DSIH_INVALID, + + VKD3DSIH_COUNT, }; enum vkd3d_shader_register_type @@ -1424,6 +1426,7 @@ struct vsir_program bool use_vocp; bool has_point_size; bool has_point_coord; + uint8_t diffuse_written_mask; enum vsir_control_flow_type cf_type; enum vsir_normalisation_level normalisation_level; @@ -1442,6 +1445,8 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c enum vsir_normalisation_level normalisation_level); enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); +enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, const char *source_name, struct vkd3d_shader_message_context *message_context); struct vkd3d_shader_src_param *vsir_program_create_outpointid_param( -- 2.45.2