vkd3d-shader/hlsl: Add missing src1 and src2 constants to sincos on SM2.

The sincos instruction expects two specific constants on 2.0 and 2.1 profiles.

Consider the following shader:

    uniform float u;
    float4 main() : sv_target
    {
        return sin(u);
    }

On native, with ps_2_0, this compiles as:

    ps_2_0
    def c3, 0.159154937, 0.5, 6.28318548, -3.14159274
    def c1, -1.55009923e-006, -2.17013894e-005, 0.00260416674, 0.00026041668
    def c2, -0.020833334, -0.125, 1, 0.5
    mov r0.xy, c3
    mad r0.x, c0.x, r0.x, r0.y
    frc r0.x, r0.x
    mad r0.x, r0.x, c3.z, c3.w
    sincos r1.y, r0.x, c1, c2
    mov r0, r1.y
    mov oC0, r0

We are not emitting the src1 and src2 constant arguments before this
patch.
This commit is contained in:
Francisco Casas 2024-07-23 17:12:53 -04:00 committed by Henri Verbeet
parent 9f4a568868
commit e6e82ad3f6
Notes: Henri Verbeet 2024-07-31 22:42:18 +02:00
Approved-by: Giovanni Mascellani (@giomasce)
Approved-by: Henri Verbeet (@hverbeet)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/961
3 changed files with 70 additions and 1 deletions

View File

@ -2374,12 +2374,40 @@ static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc,
static void d3dbc_write_sincos(struct d3dbc_compiler *d3dbc, enum hlsl_ir_expr_op op,
const struct hlsl_reg *dst, const struct hlsl_reg *src)
{
struct sm1_instruction instr =
{
.opcode = D3DSIO_SINCOS,
.dst.type = VKD3DSPR_TEMP,
.dst.writemask = dst->writemask,
.dst.reg = dst->id,
.has_dst = 1,
.srcs[0].type = VKD3DSPR_TEMP,
.srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask),
.srcs[0].reg = src->id,
.src_count = 1,
};
if (op == HLSL_OP1_COS_REDUCED)
VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_0);
else /* HLSL_OP1_SIN_REDUCED */
VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_1);
d3dbc_write_unary_op(d3dbc, D3DSIO_SINCOS, dst, src, 0, 0);
if (d3dbc->ctx->profile->major_version < 3)
{
instr.src_count = 3;
instr.srcs[1].type = VKD3DSPR_CONST;
instr.srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL);
instr.srcs[1].reg = d3dbc->ctx->d3dsincosconst1.id;
instr.srcs[2].type = VKD3DSPR_CONST;
instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL);
instr.srcs[2].reg = d3dbc->ctx->d3dsincosconst2.id;
}
d3dbc_write_instruction(d3dbc, &instr);
}
static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr)

View File

@ -1001,6 +1001,8 @@ struct hlsl_ctx
} *regs;
size_t count, size;
} constant_defs;
/* 'c' registers where the constants expected by SM2 sincos are stored. */
struct hlsl_reg d3dsincosconst1, d3dsincosconst2;
/* Number of temp. registers required for the shader to run, i.e. the largest temp register
* index that will be used in the output bytecode (+1). */
uint32_t temp_count;

View File

@ -4845,6 +4845,43 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx)
list_move_tail(&ctx->extern_vars, &sorted);
}
/* In SM2, 'sincos' expects specific constants as src1 and src2 arguments.
* These have to be referenced directly, i.e. as 'c' not 'r'. */
static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *block,
struct register_allocator *allocator)
{
const struct hlsl_ir_node *instr;
struct hlsl_type *type;
if (ctx->profile->major_version >= 3)
return;
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
{
if (instr->type == HLSL_IR_EXPR && (hlsl_ir_expr(instr)->op == HLSL_OP1_SIN_REDUCED
|| hlsl_ir_expr(instr)->op == HLSL_OP1_COS_REDUCED))
{
type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4);
ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type);
TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type));
record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f);
record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f);
record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f);
record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f);
ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type);
TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register('c', ctx->d3dsincosconst2, type));
record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f);
record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f);
record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2, 1.00000000e+00f);
record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3, 5.00000000e-01f);
return;
}
}
}
static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
{
struct register_allocator allocator_used = {0};
@ -4909,6 +4946,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
allocate_const_registers_recurse(ctx, &entry_func->body, &allocator);
allocate_sincos_const_registers(ctx, &entry_func->body, &allocator);
vkd3d_free(allocator.allocations);
}