diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index a7c37215..a3938d32 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -5189,6 +5189,44 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex } break; + case HLSL_OP1_RCP: + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + /* SM5 comes with a RCP opcode */ + if (tpf->ctx->profile->major_version >= 5) + { + write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); + } + else + { + /* For SM4, implement as DIV dst, 1.0, src */ + struct sm4_instruction instr; + struct hlsl_constant_value one; + + assert(type_is_float(dst_type)); + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_DIV; + + sm4_dst_from_node(&instr.dsts[0], &expr->node); + instr.dst_count = 1; + + for (unsigned int i = 0; i < 4; i++) + one.u[i].f = 1.0f; + sm4_src_from_constant_value(&instr.srcs[0], &one, dst_type->dimx, instr.dsts[0].write_mask); + sm4_src_from_node(tpf, &instr.srcs[1], arg1, instr.dsts[0].write_mask); + instr.src_count = 2; + + write_sm4_instruction(tpf, &instr); + } + break; + + default: + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_string->buffer); + } + break; + case HLSL_OP1_REINTERPRET: write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); break; diff --git a/tests/hlsl/rcp.shader_test b/tests/hlsl/rcp.shader_test index 65cf1a81..3fb1185f 100644 --- a/tests/hlsl/rcp.shader_test +++ b/tests/hlsl/rcp.shader_test @@ -1,4 +1,4 @@ -[pixel shader todo(sm>=4)] +[pixel shader] uniform float4 f; float4 main() : sv_target @@ -8,28 +8,28 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 2.0 3.0 4.0 -todo(sm>=4 & sm<6) draw quad +todo(glsl) draw quad probe (0, 0) rgba (1.0, 0.5, 0.3333333, 0.25) 8 uniform 0 float4 -1.0 -2.0 -3.0 -4.0 -todo(sm>=4 & sm<6) draw quad +todo(glsl) draw quad probe (0, 0) rgba (-1.0, -0.5, -0.3333333, -0.25) 8 uniform 0 float4 12.34 56.78 -90.12 -34.56 -todo(sm>=4 & sm<6) draw quad +todo(glsl) draw quad probe (0, 0) rgba (0.081037275, 0.017611835, -0.011096316, -0.028935184) 8 uniform 0 float4 0.25 0.125 0.0625 0.3333333 -todo(sm>=4 & sm<6) draw quad +todo(glsl) draw quad probe (0, 0) rgba (4.0, 8.0, 16.0, 3.0) 8 % Degenerate cases uniform 0 float4 0.0 -0.0 1.0e39 -1.0e39 -todo(sm>=4 & sm<6) draw quad +todo(glsl) draw quad probe (0, 0) rgba (1.0e39, -1.0e39, 0.0, -0.0) 8 -[pixel shader todo(sm>=4)] +[pixel shader] % Check invertibility uniform float4 f; @@ -40,19 +40,19 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 2.0 3.0 4.0 -todo(sm>=4 & sm<6) draw quad +todo(glsl) draw quad probe (0, 0) rgba (1.0, 2.0, 3.0, 4.0) 8 uniform 0 float4 7.604 3.1415 2.72 9.876 -todo(sm>=4 & sm<6) draw quad +todo(glsl) draw quad probe (0, 0) rgba (7.604, 3.1415, 2.72, 9.876) 8 uniform 0 float4 0.0 -0.0 1.0e39 -1.0e39 -todo(sm>=4 & sm<6) draw quad +todo(glsl) draw quad probe (0, 0) rgba (0.0, -0.0, 1.0e39, -1.0e39) -[pixel shader todo(sm>=4)] +[pixel shader] % Make sure swizzling works uniform float4 f; @@ -68,9 +68,9 @@ float4 main() : sv_target [test] uniform 0 float4 1.0 2.0 3.0 4.0 -todo(sm>=4 & sm<6) draw quad +todo(glsl) draw quad probe (0, 0) rgba (1.0, 0.5, 0.3333333, 0.25) 8 uniform 0 float4 -1.0 -2.0 -3.0 -4.0 -todo(sm>=4 & sm<6) draw quad +todo(glsl) draw quad probe (0, 0) rgba (-1.0, -0.5, -0.3333333, -0.25) 8