vkd3d-shader/hlsl: Implement the firstbithigh() intrinsic.

This commit is contained in:
Petrichor Park
2024-07-29 13:12:09 -05:00
committed by Henri Verbeet
parent e49beca0d5
commit e6d840170d
Notes: Henri Verbeet 2025-09-22 11:46:20 +02:00
Approved-by: Francisco Casas (@fcasas)
Approved-by: Elizabeth Figura (@zfigura)
Approved-by: Henri Verbeet (@hverbeet)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/965
7 changed files with 192 additions and 7 deletions

View File

@@ -393,6 +393,11 @@ bool hlsl_type_is_signed_integer(const struct hlsl_type *type)
vkd3d_unreachable();
}
bool hlsl_type_is_unsigned_integer(const struct hlsl_type *type)
{
return hlsl_type_is_integer(type) && !hlsl_type_is_signed_integer(type);
}
bool hlsl_type_is_integer(const struct hlsl_type *type)
{
VKD3D_ASSERT(hlsl_is_numeric_type(type));
@@ -3724,6 +3729,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op)
[HLSL_OP1_BIT_NOT] = "~",
[HLSL_OP1_CAST] = "cast",
[HLSL_OP1_CEIL] = "ceil",
[HLSL_OP1_CLZ] = "clz",
[HLSL_OP1_COS] = "cos",
[HLSL_OP1_COS_REDUCED] = "cos_reduced",
[HLSL_OP1_COUNTBITS] = "countbits",
@@ -3736,6 +3742,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op)
[HLSL_OP1_EXP2] = "exp2",
[HLSL_OP1_F16TOF32] = "f16tof32",
[HLSL_OP1_F32TOF16] = "f32tof16",
[HLSL_OP1_FIND_MSB] = "find_msb",
[HLSL_OP1_FLOOR] = "floor",
[HLSL_OP1_FRACT] = "fract",
[HLSL_OP1_ISINF] = "isinf",

View File

@@ -718,6 +718,7 @@ enum hlsl_ir_expr_op
HLSL_OP1_BIT_NOT,
HLSL_OP1_CAST,
HLSL_OP1_CEIL,
HLSL_OP1_CLZ,
HLSL_OP1_COS,
HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi], writes to .x */
HLSL_OP1_COUNTBITS,
@@ -730,6 +731,7 @@ enum hlsl_ir_expr_op
HLSL_OP1_EXP2,
HLSL_OP1_F16TOF32,
HLSL_OP1_F32TOF16,
HLSL_OP1_FIND_MSB,
HLSL_OP1_FLOOR,
HLSL_OP1_FRACT,
HLSL_OP1_ISINF,
@@ -1792,6 +1794,7 @@ bool hlsl_type_is_integer(const struct hlsl_type *type);
bool hlsl_type_is_floating_point(const struct hlsl_type *type);
bool hlsl_type_is_row_major(const struct hlsl_type *type);
bool hlsl_type_is_signed_integer(const struct hlsl_type *type);
bool hlsl_type_is_unsigned_integer(const struct hlsl_type *type);
unsigned int hlsl_type_minor_size(const struct hlsl_type *type);
unsigned int hlsl_type_major_size(const struct hlsl_type *type);
unsigned int hlsl_type_element_count(const struct hlsl_type *type);

View File

@@ -3181,6 +3181,20 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx,
return true;
}
static bool elementwise_intrinsic_int_convert_args(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
struct hlsl_type *type;
if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc)))
return false;
type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_INT, type->e.numeric.dimx, type->e.numeric.dimy);
convert_args(ctx, params, type, loc);
return true;
}
static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
@@ -3939,6 +3953,58 @@ static bool intrinsic_f32tof16(struct hlsl_ctx *ctx,
return add_expr(ctx, params->instrs, HLSL_OP1_F32TOF16, operands, type, loc);
}
static bool intrinsic_firstbithigh(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
struct hlsl_type *type = params->args[0]->data_type;
struct hlsl_ir_node *c, *clz, *eq, *xor;
struct hlsl_constant_value v;
if (hlsl_version_lt(ctx, 4, 0))
hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
"The 'firstbithigh' intrinsic requires shader model 4.0 or higher.");
if (hlsl_type_is_unsigned_integer(type))
{
if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc))
return false;
}
else
{
if (!elementwise_intrinsic_int_convert_args(ctx, params, loc))
return false;
}
type = convert_numeric_type(ctx, type, HLSL_TYPE_UINT);
operands[0] = params->args[0];
if (hlsl_version_lt(ctx, 5, 0))
return add_expr(ctx, params->instrs, HLSL_OP1_FIND_MSB, operands, type, loc);
v.u[0].u = 0x1f;
if (!(c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &v, loc)))
return false;
hlsl_block_add_instr(params->instrs, c);
if (!(clz = add_expr(ctx, params->instrs, HLSL_OP1_CLZ, operands, type, loc)))
return false;
if (!(xor = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_BIT_XOR, c, clz, loc)))
return false;
v.u[0].i = -1;
if (!(c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &v, loc)))
return false;
hlsl_block_add_instr(params->instrs, c);
if (!(eq = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_EQUAL, clz, c, loc)))
return false;
operands[0] = eq;
operands[1] = add_implicit_conversion(ctx, params->instrs, c, type, loc);
operands[2] = xor;
return add_expr(ctx, params->instrs, HLSL_OP3_TERNARY, operands, type, loc);
}
static bool intrinsic_floor(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
@@ -5349,6 +5415,7 @@ intrinsic_functions[] =
{"f16tof32", 1, true, intrinsic_f16tof32},
{"f32tof16", 1, true, intrinsic_f32tof16},
{"faceforward", 3, true, intrinsic_faceforward},
{"firstbithigh", 1, true, intrinsic_firstbithigh},
{"floor", 1, true, intrinsic_floor},
{"fmod", 2, true, intrinsic_fmod},
{"frac", 1, true, intrinsic_frac},

View File

@@ -11057,6 +11057,15 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx,
generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_ROUND_PI, 0, 0, true);
return true;
case HLSL_OP1_CLZ:
VKD3D_ASSERT(hlsl_type_is_integer(dst_type));
VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0));
if (hlsl_type_is_signed_integer(src_type))
generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_FIRSTBIT_SHI, 0, 0, true);
else
generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_FIRSTBIT_HI, 0, 0, true);
return true;
case HLSL_OP1_COS:
VKD3D_ASSERT(type_is_float(dst_type));
sm4_generate_vsir_expr_with_two_destinations(ctx, program, VSIR_OP_SINCOS, expr, 1);
@@ -14290,6 +14299,69 @@ static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru
return true;
}
static bool lower_find_msb(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
{
struct hlsl_ir_function_decl *func;
struct hlsl_ir_node *call, *rhs;
struct hlsl_ir_expr *expr;
struct hlsl_ir_var *lhs;
char *body;
/* For positive numbers, find_msb() returns the bit number of the most
* significant 1-bit. For negative numbers, it returns the bit number of
* the most significant 0-bit. Bit numbers count from the least
* significant bit. */
static const char template[] =
"typedef %s intX;\n"
"uint%u find_msb(intX v)\n"
"{\n"
" intX c, mask;\n"
" v = v < 0 ? ~v : v;\n"
" mask = v & 0xffff0000;\n"
" v = mask ? mask : v;\n"
" c = mask ? 16 : v ? 0 : -1;\n"
" mask = v & 0xff00ff00;\n"
" v = mask ? mask : v;\n"
" c = mask ? c + 8 : c;\n"
" mask = v & 0xf0f0f0f0;\n"
" v = mask ? mask : v;\n"
" c = mask ? c + 4 : c;\n"
" mask = v & 0xcccccccc;\n"
" v = mask ? mask : v;\n"
" c = mask ? c + 2 : c;\n"
" mask = v & 0xaaaaaaaa;\n"
" v = mask ? mask : v;\n"
" c = mask ? c + 1 : c;\n"
" return c;\n"
"}\n";
if (node->type != HLSL_IR_EXPR)
return false;
expr = hlsl_ir_expr(node);
if (expr->op != HLSL_OP1_FIND_MSB)
return false;
rhs = expr->operands[0].node;
if (!(body = hlsl_sprintf_alloc(ctx, template, rhs->data_type->name, hlsl_type_component_count(rhs->data_type))))
return false;
func = hlsl_compile_internal_function(ctx, "find_msb", body);
vkd3d_free(body);
if (!func)
return false;
lhs = func->parameters.vars[0];
hlsl_block_add_simple_store(ctx, block, lhs, rhs);
if (!(call = hlsl_new_call(ctx, func, &node->loc)))
return false;
hlsl_block_add_instr(block, call);
hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc);
return true;
}
static bool lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
{
struct hlsl_ir_function_decl *func;
@@ -14409,6 +14481,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v
lower_ir(ctx, lower_countbits, body);
lower_ir(ctx, lower_f16tof32, body);
lower_ir(ctx, lower_f32tof16, body);
lower_ir(ctx, lower_find_msb, body);
}
lower_ir(ctx, lower_isinf, body);

View File

@@ -250,6 +250,35 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
return true;
}
static bool fold_clz(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src)
{
enum hlsl_base_type type = src->node.data_type->e.numeric.type;
unsigned int k, v;
for (k = 0; k < dst_type->e.numeric.dimx; ++k)
{
switch (type)
{
case HLSL_TYPE_INT:
v = src->value.u[k].i < 0 ? ~src->value.u[k].u : src->value.u[k].u;
break;
case HLSL_TYPE_UINT:
v = src->value.u[k].u;
break;
default:
FIXME("Fold 'clz' for type %s.\n", debug_hlsl_type(ctx, dst_type));
return false;
}
dst->u[k].u = v ? vkd3d_log2i(v) ^ 0x1f : ~0u;
}
return true;
}
static bool fold_cos(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src)
{
@@ -1428,6 +1457,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
success = fold_ceil(ctx, &res, instr->data_type, arg1);
break;
case HLSL_OP1_CLZ:
success = fold_clz(ctx, &res, instr->data_type, arg1);
break;
case HLSL_OP1_COS:
success = fold_cos(ctx, &res, instr->data_type, arg1);
break;

View File

@@ -4274,6 +4274,8 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
case VSIR_OP_EXP:
case VSIR_OP_F16TOF32:
case VSIR_OP_F32TOF16:
case VSIR_OP_FIRSTBIT_HI:
case VSIR_OP_FIRSTBIT_SHI:
case VSIR_OP_FRC:
case VSIR_OP_FTOI:
case VSIR_OP_FTOU:

View File

@@ -288,7 +288,7 @@ probe(1, 0) u32(0, 1, 0, 3)
probe(2, 0) u32(0, 1, 0, 2)
probe(3, 0) u32(0, 1, 0, 4)
[pixel shader todo]
[pixel shader]
uint4 u;
uint4 main() : sv_target
@@ -298,10 +298,10 @@ uint4 main() : sv_target
[test]
uniform 0 uint4 0 0xffffffff 0x00001000 0x00760400
todo(sm<6 | msl & sm>=6) draw quad
todo(msl & sm>=6) draw quad
probe (0, 0) u32(0xffffffff, 31, 12, 22)
[pixel shader todo]
[pixel shader]
int4 i;
uint4 main() : sv_target
@@ -311,13 +311,13 @@ uint4 main() : sv_target
[test]
uniform 0 uint4 0 0xffffffff 0x00001db4 0xff900000
todo(sm<6 | msl & sm>=6) draw quad
todo(msl & sm>=6) draw quad
probe (0, 0) i32(-1, -1, 12, 22)
uniform 0 int4 -500 -400 -300 -7604
todo(sm<6 | msl & sm>=6) draw quad
todo(msl & sm>=6) draw quad
probe (0, 0) i32(8, 8, 8, 12)
[pixel shader todo]
[pixel shader]
uint4 main(float4 pos : sv_position) : sv_target
{
uint4x4 umat =
@@ -332,7 +332,7 @@ uint4 main(float4 pos : sv_position) : sv_target
}
[test]
todo(sm<6 | msl & sm>=6) draw quad
todo(msl & sm>=6) draw quad
probe(0, 0) u32(0, 1, 1, 2)
probe(1, 0) u32(2, 2, 2, 3)
probe(2, 0) u32(3, 3, 3, 3)