From b4845b9dcaa0a1075a9d0e400009b1f810170d20 Mon Sep 17 00:00:00 2001 From: Shaun Ren Date: Wed, 10 Jul 2024 22:47:51 -0400 Subject: [PATCH] vkd3d-shader/hlsl: Implement sin/cos for SM1. Also introduce HLSL_OP3_MAD. --- libs/vkd3d-shader/hlsl.c | 1 + libs/vkd3d-shader/hlsl.h | 1 + libs/vkd3d-shader/hlsl_codegen.c | 104 +++++++++++++++++++++++++++++++ 3 files changed, 106 insertions(+) diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index acf50869..1526d7b0 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -2849,6 +2849,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP3_CMP] = "cmp", [HLSL_OP3_DP2ADD] = "dp2add", [HLSL_OP3_TERNARY] = "ternary", + [HLSL_OP3_MAD] = "mad", }; return op_names[op]; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 0f78f0b5..cd6cf91e 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -699,6 +699,7 @@ enum hlsl_ir_expr_op * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ HLSL_OP3_CMP, HLSL_OP3_TERNARY, + HLSL_OP3_MAD, }; #define HLSL_MAX_OPERANDS 3 diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 02884df9..c96a70b4 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -20,6 +20,7 @@ #include "hlsl.h" #include +#include /* TODO: remove when no longer needed, only used for new_offset_instr_from_deref() */ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -3016,6 +3017,108 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct return true; } +/* Lower SIN/COS to SINCOS for SM1. */ +static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_node *arg, *half, *two_pi, *reciprocal_two_pi, *neg_pi; + struct hlsl_constant_value half_value, two_pi_value, reciprocal_two_pi_value, neg_pi_value; + struct hlsl_ir_node *mad, *frc, *reduced; + struct hlsl_type *type; + struct hlsl_ir_expr *expr; + enum hlsl_ir_expr_op op; + struct hlsl_ir_node *sincos; + int i; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + + if (expr->op == HLSL_OP1_SIN) + op = HLSL_OP1_SIN_REDUCED; + else if (expr->op == HLSL_OP1_COS) + op = HLSL_OP1_COS_REDUCED; + else + return false; + + arg = expr->operands[0].node; + type = arg->data_type; + + /* Reduce the range of the input angles to [-pi, pi]. */ + for (i = 0; i < type->dimx; ++i) + { + half_value.u[i].f = 0.5; + two_pi_value.u[i].f = 2.0 * M_PI; + reciprocal_two_pi_value.u[i].f = 1.0 / (2.0 * M_PI); + neg_pi_value.u[i].f = -M_PI; + } + + if (!(half = hlsl_new_constant(ctx, type, &half_value, &instr->loc)) + || !(two_pi = hlsl_new_constant(ctx, type, &two_pi_value, &instr->loc)) + || !(reciprocal_two_pi = hlsl_new_constant(ctx, type, &reciprocal_two_pi_value, &instr->loc)) + || !(neg_pi = hlsl_new_constant(ctx, type, &neg_pi_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, half); + hlsl_block_add_instr(block, two_pi); + hlsl_block_add_instr(block, reciprocal_two_pi); + hlsl_block_add_instr(block, neg_pi); + + if (!(mad = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, arg, reciprocal_two_pi, half))) + return false; + hlsl_block_add_instr(block, mad); + if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mad, &instr->loc))) + return false; + hlsl_block_add_instr(block, frc); + if (!(reduced = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, frc, two_pi, neg_pi))) + return false; + hlsl_block_add_instr(block, reduced); + + if (type->dimx == 1) + { + if (!(sincos = hlsl_new_unary_expr(ctx, op, reduced, &instr->loc))) + return false; + hlsl_block_add_instr(block, sincos); + } + else + { + struct hlsl_ir_node *comps[4] = {0}; + struct hlsl_ir_var *var; + struct hlsl_deref var_deref; + struct hlsl_ir_load *var_load; + + for (i = 0; i < type->dimx; ++i) + { + uint32_t s = hlsl_swizzle_from_writemask(1 << i); + + if (!(comps[i] = hlsl_new_swizzle(ctx, s, 1, reduced, &instr->loc))) + return false; + hlsl_block_add_instr(block, comps[i]); + } + + if (!(var = hlsl_new_synthetic_var(ctx, "sincos", type, &instr->loc))) + return false; + hlsl_init_simple_deref_from_var(&var_deref, var); + + for (i = 0; i < type->dimx; ++i) + { + struct hlsl_block store_block; + + if (!(sincos = hlsl_new_unary_expr(ctx, op, comps[i], &instr->loc))) + return false; + hlsl_block_add_instr(block, sincos); + + if (!hlsl_new_store_component(ctx, &store_block, &var_deref, i, sincos)) + return false; + hlsl_block_add_block(block, &store_block); + } + + if (!(var_load = hlsl_new_load_index(ctx, &var_deref, NULL, &instr->loc))) + return false; + hlsl_block_add_instr(block, &var_load->node); + } + + return true; +} + static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; @@ -6050,6 +6153,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_round, body); lower_ir(ctx, lower_ceil, body); lower_ir(ctx, lower_floor, body); + lower_ir(ctx, lower_trig, body); lower_ir(ctx, lower_comparison_operators, body); lower_ir(ctx, lower_logic_not, body); if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)