From e6e82ad3f6c41d99462a671fe1d3dfba5db519bf Mon Sep 17 00:00:00 2001 From: Francisco Casas Date: Tue, 23 Jul 2024 17:12:53 -0400 Subject: [PATCH] vkd3d-shader/hlsl: Add missing src1 and src2 constants to sincos on SM2. The sincos instruction expects two specific constants on 2.0 and 2.1 profiles. Consider the following shader: uniform float u; float4 main() : sv_target { return sin(u); } On native, with ps_2_0, this compiles as: ps_2_0 def c3, 0.159154937, 0.5, 6.28318548, -3.14159274 def c1, -1.55009923e-006, -2.17013894e-005, 0.00260416674, 0.00026041668 def c2, -0.020833334, -0.125, 1, 0.5 mov r0.xy, c3 mad r0.x, c0.x, r0.x, r0.y frc r0.x, r0.x mad r0.x, r0.x, c3.z, c3.w sincos r1.y, r0.x, c1, c2 mov r0, r1.y mov oC0, r0 We are not emitting the src1 and src2 constant arguments before this patch. --- libs/vkd3d-shader/d3dbc.c | 30 +++++++++++++++++++++++- libs/vkd3d-shader/hlsl.h | 2 ++ libs/vkd3d-shader/hlsl_codegen.c | 39 ++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+), 1 deletion(-) diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 3bf8368b..0791c843 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -2374,12 +2374,40 @@ static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc, static void d3dbc_write_sincos(struct d3dbc_compiler *d3dbc, enum hlsl_ir_expr_op op, const struct hlsl_reg *dst, const struct hlsl_reg *src) { + struct sm1_instruction instr = + { + .opcode = D3DSIO_SINCOS, + + .dst.type = VKD3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), + .srcs[0].reg = src->id, + .src_count = 1, + }; + if (op == HLSL_OP1_COS_REDUCED) VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_0); else /* HLSL_OP1_SIN_REDUCED */ VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_1); - d3dbc_write_unary_op(d3dbc, D3DSIO_SINCOS, dst, src, 0, 0); + if (d3dbc->ctx->profile->major_version < 3) + { + instr.src_count = 3; + + instr.srcs[1].type = VKD3DSPR_CONST; + instr.srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); + instr.srcs[1].reg = d3dbc->ctx->d3dsincosconst1.id; + + instr.srcs[2].type = VKD3DSPR_CONST; + instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); + instr.srcs[2].reg = d3dbc->ctx->d3dsincosconst2.id; + } + + d3dbc_write_instruction(d3dbc, &instr); } static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index d08dd4f2..ca4db6c7 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1001,6 +1001,8 @@ struct hlsl_ctx } *regs; size_t count, size; } constant_defs; + /* 'c' registers where the constants expected by SM2 sincos are stored. */ + struct hlsl_reg d3dsincosconst1, d3dsincosconst2; /* Number of temp. registers required for the shader to run, i.e. the largest temp register * index that will be used in the output bytecode (+1). */ uint32_t temp_count; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 26386c0b..da07b383 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -4845,6 +4845,43 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) list_move_tail(&ctx->extern_vars, &sorted); } +/* In SM2, 'sincos' expects specific constants as src1 and src2 arguments. + * These have to be referenced directly, i.e. as 'c' not 'r'. */ +static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct register_allocator *allocator) +{ + const struct hlsl_ir_node *instr; + struct hlsl_type *type; + + if (ctx->profile->major_version >= 3) + return; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->type == HLSL_IR_EXPR && (hlsl_ir_expr(instr)->op == HLSL_OP1_SIN_REDUCED + || hlsl_ir_expr(instr)->op == HLSL_OP1_COS_REDUCED)) + { + type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); + + ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); + TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type)); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f); + + ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); + TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register('c', ctx->d3dsincosconst2, type)); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2, 1.00000000e+00f); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3, 5.00000000e-01f); + + return; + } + } +} + static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { struct register_allocator allocator_used = {0}; @@ -4909,6 +4946,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); + allocate_sincos_const_registers(ctx, &entry_func->body, &allocator); + vkd3d_free(allocator.allocations); }