From 5db2c2a949a7ef820ac48d269305b9510c47dd48 Mon Sep 17 00:00:00 2001 From: Francisco Casas Date: Fri, 31 May 2024 16:06:11 -0400 Subject: [PATCH] vkd3d-shader/hlsl: Save DOT hlsl_ir_exprs in the vsir_program for SM1. --- libs/vkd3d-shader/d3dbc.c | 42 ++----------------------- libs/vkd3d-shader/hlsl_codegen.c | 53 ++++++++++++++++++++++---------- 2 files changed, 38 insertions(+), 57 deletions(-) diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 5532ee20..862c8014 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -1987,30 +1987,6 @@ static void d3dbc_write_dp2add(struct d3dbc_compiler *d3dbc, const struct hlsl_r d3dbc_write_instruction(d3dbc, &instr); } -static void d3dbc_write_dot(struct d3dbc_compiler *d3dbc, enum vkd3d_sm1_opcode opcode, - const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) -{ - struct sm1_instruction instr = - { - .opcode = opcode, - - .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - - .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, - .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; - - d3dbc_write_instruction(d3dbc, &instr); -} - static void d3dbc_write_unary_op(struct d3dbc_compiler *d3dbc, enum vkd3d_sm1_opcode opcode, const struct hlsl_reg *dst, const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) @@ -2340,6 +2316,8 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str case VKD3DSIH_ABS: case VKD3DSIH_ADD: case VKD3DSIH_CMP: + case VKD3DSIH_DP3: + case VKD3DSIH_DP4: case VKD3DSIH_DSX: case VKD3DSIH_DSY: case VKD3DSIH_FRC: @@ -2517,22 +2495,6 @@ static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_ d3dbc_write_sincos(d3dbc, expr->op, &instr->reg, &arg1->reg); break; - case HLSL_OP2_DOT: - switch (arg1->data_type->dimx) - { - case 4: - d3dbc_write_dot(d3dbc, VKD3D_SM1_OP_DP4, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case 3: - d3dbc_write_dot(d3dbc, VKD3D_SM1_OP_DP3, &instr->reg, &arg1->reg, &arg2->reg); - break; - - default: - vkd3d_unreachable(); - } - break; - case HLSL_OP3_DP2ADD: d3dbc_write_dp2add(d3dbc, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); break; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 5cf35ecc..35351910 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -6508,7 +6508,8 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, /* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, struct vsir_program *program, - struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, uint32_t src_mod, uint32_t dst_mod) + struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, uint32_t src_mod, uint32_t dst_mod, + bool map_src_swizzles) { struct vkd3d_shader_instruction_array *instructions = &program->instructions; struct hlsl_ir_node *instr = &expr->node; @@ -6543,7 +6544,8 @@ static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, s src_param = &ins->src[i]; vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); src_param->reg.idx[0].offset = operand->reg.id; - src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, dst_param->write_mask); + src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, + map_src_swizzles ? dst_param->write_mask : VKD3DSP_WRITEMASK_ALL); src_param->modifiers = src_mod; } @@ -6636,15 +6638,15 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr switch (expr->op) { case HLSL_OP1_ABS: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0); + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true); break; case HLSL_OP1_DSX: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0); + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); break; case HLSL_OP1_DSY: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0); + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); break; case HLSL_OP1_EXP2: @@ -6656,7 +6658,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr break; case HLSL_OP1_NEG: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0); + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); break; case HLSL_OP1_RCP: @@ -6668,47 +6670,64 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr break; case HLSL_OP1_SAT: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE); + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); break; case HLSL_OP2_ADD: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0); + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); + break; + + case HLSL_OP2_DOT: + switch (expr->operands[0].node->data_type->dimx) + { + case 3: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); + break; + + case 4: + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); + break; + + default: + vkd3d_unreachable(); + return false; + } break; case HLSL_OP2_MAX: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0); + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); break; case HLSL_OP2_MIN: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0); + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); break; case HLSL_OP2_MUL: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0); + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); break; case HLSL_OP1_FRACT: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0); + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); break; case HLSL_OP2_LOGIC_AND: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0); + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); break; case HLSL_OP2_LOGIC_OR: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0); + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); break; case HLSL_OP2_SLT: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0); + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true); break; case HLSL_OP3_CMP: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0); + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true); break; case HLSL_OP3_MAD: - sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0); + sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); break; default: