vkd3d-shader/tpf: Implmenent HLSL_OP1_RCP.

SM5 comes with a RCP opcode; for SM4, implement it as `DIV dst, 1, x`.
This commit is contained in:
Petrichor Park 2024-04-22 12:24:16 -05:00 committed by Henri Verbeet
parent b3f0cd5788
commit bec4f413dc
Notes: Henri Verbeet 2024-07-11 17:16:15 +02:00
Approved-by: Elizabeth Figura (@zfigura)
Approved-by: Giovanni Mascellani (@giomasce)
Approved-by: Henri Verbeet (@hverbeet)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/822
2 changed files with 51 additions and 13 deletions

View File

@ -5189,6 +5189,44 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex
} }
break; break;
case HLSL_OP1_RCP:
switch (dst_type->e.numeric.type)
{
case HLSL_TYPE_FLOAT:
/* SM5 comes with a RCP opcode */
if (tpf->ctx->profile->major_version >= 5)
{
write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0);
}
else
{
/* For SM4, implement as DIV dst, 1.0, src */
struct sm4_instruction instr;
struct hlsl_constant_value one;
assert(type_is_float(dst_type));
memset(&instr, 0, sizeof(instr));
instr.opcode = VKD3D_SM4_OP_DIV;
sm4_dst_from_node(&instr.dsts[0], &expr->node);
instr.dst_count = 1;
for (unsigned int i = 0; i < 4; i++)
one.u[i].f = 1.0f;
sm4_src_from_constant_value(&instr.srcs[0], &one, dst_type->dimx, instr.dsts[0].write_mask);
sm4_src_from_node(tpf, &instr.srcs[1], arg1, instr.dsts[0].write_mask);
instr.src_count = 2;
write_sm4_instruction(tpf, &instr);
}
break;
default:
hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_string->buffer);
}
break;
case HLSL_OP1_REINTERPRET: case HLSL_OP1_REINTERPRET:
write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0);
break; break;

View File

@ -1,4 +1,4 @@
[pixel shader todo(sm>=4)] [pixel shader]
uniform float4 f; uniform float4 f;
float4 main() : sv_target float4 main() : sv_target
@ -8,28 +8,28 @@ float4 main() : sv_target
[test] [test]
uniform 0 float4 1.0 2.0 3.0 4.0 uniform 0 float4 1.0 2.0 3.0 4.0
todo(sm>=4 & sm<6) draw quad todo(glsl) draw quad
probe (0, 0) rgba (1.0, 0.5, 0.3333333, 0.25) 8 probe (0, 0) rgba (1.0, 0.5, 0.3333333, 0.25) 8
uniform 0 float4 -1.0 -2.0 -3.0 -4.0 uniform 0 float4 -1.0 -2.0 -3.0 -4.0
todo(sm>=4 & sm<6) draw quad todo(glsl) draw quad
probe (0, 0) rgba (-1.0, -0.5, -0.3333333, -0.25) 8 probe (0, 0) rgba (-1.0, -0.5, -0.3333333, -0.25) 8
uniform 0 float4 12.34 56.78 -90.12 -34.56 uniform 0 float4 12.34 56.78 -90.12 -34.56
todo(sm>=4 & sm<6) draw quad todo(glsl) draw quad
probe (0, 0) rgba (0.081037275, 0.017611835, -0.011096316, -0.028935184) 8 probe (0, 0) rgba (0.081037275, 0.017611835, -0.011096316, -0.028935184) 8
uniform 0 float4 0.25 0.125 0.0625 0.3333333 uniform 0 float4 0.25 0.125 0.0625 0.3333333
todo(sm>=4 & sm<6) draw quad todo(glsl) draw quad
probe (0, 0) rgba (4.0, 8.0, 16.0, 3.0) 8 probe (0, 0) rgba (4.0, 8.0, 16.0, 3.0) 8
% Degenerate cases % Degenerate cases
uniform 0 float4 0.0 -0.0 1.0e39 -1.0e39 uniform 0 float4 0.0 -0.0 1.0e39 -1.0e39
todo(sm>=4 & sm<6) draw quad todo(glsl) draw quad
probe (0, 0) rgba (1.0e39, -1.0e39, 0.0, -0.0) 8 probe (0, 0) rgba (1.0e39, -1.0e39, 0.0, -0.0) 8
[pixel shader todo(sm>=4)] [pixel shader]
% Check invertibility % Check invertibility
uniform float4 f; uniform float4 f;
@ -40,19 +40,19 @@ float4 main() : sv_target
[test] [test]
uniform 0 float4 1.0 2.0 3.0 4.0 uniform 0 float4 1.0 2.0 3.0 4.0
todo(sm>=4 & sm<6) draw quad todo(glsl) draw quad
probe (0, 0) rgba (1.0, 2.0, 3.0, 4.0) 8 probe (0, 0) rgba (1.0, 2.0, 3.0, 4.0) 8
uniform 0 float4 7.604 3.1415 2.72 9.876 uniform 0 float4 7.604 3.1415 2.72 9.876
todo(sm>=4 & sm<6) draw quad todo(glsl) draw quad
probe (0, 0) rgba (7.604, 3.1415, 2.72, 9.876) 8 probe (0, 0) rgba (7.604, 3.1415, 2.72, 9.876) 8
uniform 0 float4 0.0 -0.0 1.0e39 -1.0e39 uniform 0 float4 0.0 -0.0 1.0e39 -1.0e39
todo(sm>=4 & sm<6) draw quad todo(glsl) draw quad
probe (0, 0) rgba (0.0, -0.0, 1.0e39, -1.0e39) probe (0, 0) rgba (0.0, -0.0, 1.0e39, -1.0e39)
[pixel shader todo(sm>=4)] [pixel shader]
% Make sure swizzling works % Make sure swizzling works
uniform float4 f; uniform float4 f;
@ -68,9 +68,9 @@ float4 main() : sv_target
[test] [test]
uniform 0 float4 1.0 2.0 3.0 4.0 uniform 0 float4 1.0 2.0 3.0 4.0
todo(sm>=4 & sm<6) draw quad todo(glsl) draw quad
probe (0, 0) rgba (1.0, 0.5, 0.3333333, 0.25) 8 probe (0, 0) rgba (1.0, 0.5, 0.3333333, 0.25) 8
uniform 0 float4 -1.0 -2.0 -3.0 -4.0 uniform 0 float4 -1.0 -2.0 -3.0 -4.0
todo(sm>=4 & sm<6) draw quad todo(glsl) draw quad
probe (0, 0) rgba (-1.0, -0.5, -0.3333333, -0.25) 8 probe (0, 0) rgba (-1.0, -0.5, -0.3333333, -0.25) 8