diff --git a/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d-shader/d3dbc.c index 3bf8368b..0791c843 100644 --- a/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d-shader/d3dbc.c @@ -2374,12 +2374,40 @@ static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc, static void d3dbc_write_sincos(struct d3dbc_compiler *d3dbc, enum hlsl_ir_expr_op op, const struct hlsl_reg *dst, const struct hlsl_reg *src) { + struct sm1_instruction instr = + { + .opcode = D3DSIO_SINCOS, + + .dst.type = VKD3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), + .srcs[0].reg = src->id, + .src_count = 1, + }; + if (op == HLSL_OP1_COS_REDUCED) VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_0); else /* HLSL_OP1_SIN_REDUCED */ VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_1); - d3dbc_write_unary_op(d3dbc, D3DSIO_SINCOS, dst, src, 0, 0); + if (d3dbc->ctx->profile->major_version < 3) + { + instr.src_count = 3; + + instr.srcs[1].type = VKD3DSPR_CONST; + instr.srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); + instr.srcs[1].reg = d3dbc->ctx->d3dsincosconst1.id; + + instr.srcs[2].type = VKD3DSPR_CONST; + instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); + instr.srcs[2].reg = d3dbc->ctx->d3dsincosconst2.id; + } + + d3dbc_write_instruction(d3dbc, &instr); } static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index d08dd4f2..ca4db6c7 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -1001,6 +1001,8 @@ struct hlsl_ctx } *regs; size_t count, size; } constant_defs; + /* 'c' registers where the constants expected by SM2 sincos are stored. */ + struct hlsl_reg d3dsincosconst1, d3dsincosconst2; /* Number of temp. registers required for the shader to run, i.e. the largest temp register * index that will be used in the output bytecode (+1). */ uint32_t temp_count; diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 26386c0b..da07b383 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -4845,6 +4845,43 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) list_move_tail(&ctx->extern_vars, &sorted); } +/* In SM2, 'sincos' expects specific constants as src1 and src2 arguments. + * These have to be referenced directly, i.e. as 'c' not 'r'. */ +static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct register_allocator *allocator) +{ + const struct hlsl_ir_node *instr; + struct hlsl_type *type; + + if (ctx->profile->major_version >= 3) + return; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->type == HLSL_IR_EXPR && (hlsl_ir_expr(instr)->op == HLSL_OP1_SIN_REDUCED + || hlsl_ir_expr(instr)->op == HLSL_OP1_COS_REDUCED)) + { + type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); + + ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); + TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type)); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f); + + ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); + TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register('c', ctx->d3dsincosconst2, type)); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2, 1.00000000e+00f); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3, 5.00000000e-01f); + + return; + } + } +} + static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { struct register_allocator allocator_used = {0}; @@ -4909,6 +4946,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); + allocate_sincos_const_registers(ctx, &entry_func->body, &allocator); + vkd3d_free(allocator.allocations); }