mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2025-12-15 08:03:30 -08:00
vkd3d-shader/hlsl: Implement the firstbitlow() intrinsic.
This commit is contained in:
committed by
Henri Verbeet
parent
e6d840170d
commit
e35604dbf0
Notes:
Henri Verbeet
2025-09-22 11:46:20 +02:00
Approved-by: Francisco Casas (@fcasas) Approved-by: Elizabeth Figura (@zfigura) Approved-by: Henri Verbeet (@hverbeet) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/965
@@ -173,6 +173,7 @@ AS_IF([test "x$with_xcb" != "xno"],
|
||||
|
||||
dnl Check for functions
|
||||
VKD3D_CHECK_FUNC([HAVE_BUILTIN_CLZ], [__builtin_clz], [__builtin_clz(0)])
|
||||
VKD3D_CHECK_FUNC([HAVE_BUILTIN_CTZ], [__builtin_ctz], [__builtin_ctz(0)])
|
||||
VKD3D_CHECK_FUNC([HAVE_BUILTIN_POPCOUNT], [__builtin_popcount], [__builtin_popcount(0)])
|
||||
VKD3D_CHECK_FUNC([HAVE_BUILTIN_ADD_OVERFLOW], [__builtin_add_overflow], [__builtin_add_overflow(0, 0, (int *)0)])
|
||||
VKD3D_CHECK_FUNC([HAVE_SYNC_ADD_AND_FETCH], [__sync_add_and_fetch], [__sync_add_and_fetch((int *)0, 0)])
|
||||
|
||||
@@ -343,6 +343,24 @@ static inline unsigned int vkd3d_log2i(unsigned int x)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline unsigned int vkd3d_ctz(uint32_t v)
|
||||
{
|
||||
#ifdef HAVE_BUILTIN_CTZ
|
||||
return __builtin_ctz(v);
|
||||
#else
|
||||
unsigned int c = 31;
|
||||
|
||||
v &= -v;
|
||||
c = (v & 0x0000ffff) ? c - 16 : c;
|
||||
c = (v & 0x00ff00ff) ? c - 8 : c;
|
||||
c = (v & 0x0f0f0f0f) ? c - 4 : c;
|
||||
c = (v & 0x33333333) ? c - 2 : c;
|
||||
c = (v & 0x55555555) ? c - 1 : c;
|
||||
|
||||
return c;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void *vkd3d_memmem( const void *haystack, size_t haystack_len, const void *needle, size_t needle_len)
|
||||
{
|
||||
const char *str = haystack;
|
||||
|
||||
@@ -3733,6 +3733,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op)
|
||||
[HLSL_OP1_COS] = "cos",
|
||||
[HLSL_OP1_COS_REDUCED] = "cos_reduced",
|
||||
[HLSL_OP1_COUNTBITS] = "countbits",
|
||||
[HLSL_OP1_CTZ] = "ctz",
|
||||
[HLSL_OP1_DSX] = "dsx",
|
||||
[HLSL_OP1_DSX_COARSE] = "dsx_coarse",
|
||||
[HLSL_OP1_DSX_FINE] = "dsx_fine",
|
||||
|
||||
@@ -722,6 +722,7 @@ enum hlsl_ir_expr_op
|
||||
HLSL_OP1_COS,
|
||||
HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi], writes to .x */
|
||||
HLSL_OP1_COUNTBITS,
|
||||
HLSL_OP1_CTZ,
|
||||
HLSL_OP1_DSX,
|
||||
HLSL_OP1_DSX_COARSE,
|
||||
HLSL_OP1_DSX_FINE,
|
||||
|
||||
@@ -4005,6 +4005,24 @@ static bool intrinsic_firstbithigh(struct hlsl_ctx *ctx,
|
||||
return add_expr(ctx, params->instrs, HLSL_OP3_TERNARY, operands, type, loc);
|
||||
}
|
||||
|
||||
static bool intrinsic_firstbitlow(struct hlsl_ctx *ctx,
|
||||
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
|
||||
{
|
||||
struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
|
||||
struct hlsl_type *type;
|
||||
|
||||
if (hlsl_version_lt(ctx, 4, 0))
|
||||
hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
|
||||
"The 'firstbitlow' intrinsic requires shader model 4.0 or higher.");
|
||||
|
||||
if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc))
|
||||
return false;
|
||||
type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT);
|
||||
|
||||
operands[0] = params->args[0];
|
||||
return add_expr(ctx, params->instrs, HLSL_OP1_CTZ, operands, type, loc);
|
||||
}
|
||||
|
||||
static bool intrinsic_floor(struct hlsl_ctx *ctx,
|
||||
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
|
||||
{
|
||||
@@ -5416,6 +5434,7 @@ intrinsic_functions[] =
|
||||
{"f32tof16", 1, true, intrinsic_f32tof16},
|
||||
{"faceforward", 3, true, intrinsic_faceforward},
|
||||
{"firstbithigh", 1, true, intrinsic_firstbithigh},
|
||||
{"firstbitlow", 1, true, intrinsic_firstbitlow},
|
||||
{"floor", 1, true, intrinsic_floor},
|
||||
{"fmod", 2, true, intrinsic_fmod},
|
||||
{"frac", 1, true, intrinsic_frac},
|
||||
|
||||
@@ -11077,6 +11077,12 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx,
|
||||
generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_COUNTBITS, 0, 0, true);
|
||||
return true;
|
||||
|
||||
case HLSL_OP1_CTZ:
|
||||
VKD3D_ASSERT(hlsl_type_is_integer(dst_type));
|
||||
VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0));
|
||||
generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_FIRSTBIT_LO, 0, 0, true);
|
||||
return true;
|
||||
|
||||
case HLSL_OP1_DSX:
|
||||
VKD3D_ASSERT(type_is_float(dst_type));
|
||||
generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_DSX, 0, 0, true);
|
||||
@@ -14157,6 +14163,57 @@ static bool lower_countbits(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, str
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool lower_ctz(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
|
||||
{
|
||||
struct hlsl_ir_function_decl *func;
|
||||
struct hlsl_ir_node *call, *rhs;
|
||||
struct hlsl_ir_expr *expr;
|
||||
struct hlsl_ir_var *lhs;
|
||||
char *body;
|
||||
|
||||
/* ctz() returns the bit number of the least significant 1-bit.
|
||||
* Bit numbers count from the least significant bit. */
|
||||
static const char template[] =
|
||||
"typedef uint%u uintX;\n"
|
||||
"uintX ctz(uintX v)\n"
|
||||
"{\n"
|
||||
" uintX c = 31;\n"
|
||||
" v &= -v;\n"
|
||||
" c = (v & 0x0000ffff) ? c - 16 : c;\n"
|
||||
" c = (v & 0x00ff00ff) ? c - 8 : c;\n"
|
||||
" c = (v & 0x0f0f0f0f) ? c - 4 : c;\n"
|
||||
" c = (v & 0x33333333) ? c - 2 : c;\n"
|
||||
" c = (v & 0x55555555) ? c - 1 : c;\n"
|
||||
" return v ? c : -1;\n"
|
||||
"}\n";
|
||||
|
||||
if (node->type != HLSL_IR_EXPR)
|
||||
return false;
|
||||
|
||||
expr = hlsl_ir_expr(node);
|
||||
if (expr->op != HLSL_OP1_CTZ)
|
||||
return false;
|
||||
|
||||
rhs = expr->operands[0].node;
|
||||
if (!(body = hlsl_sprintf_alloc(ctx, template, hlsl_type_component_count(rhs->data_type))))
|
||||
return false;
|
||||
func = hlsl_compile_internal_function(ctx, "ctz", body);
|
||||
vkd3d_free(body);
|
||||
if (!func)
|
||||
return false;
|
||||
|
||||
lhs = func->parameters.vars[0];
|
||||
hlsl_block_add_simple_store(ctx, block, lhs, rhs);
|
||||
|
||||
if (!(call = hlsl_new_call(ctx, func, &node->loc)))
|
||||
return false;
|
||||
hlsl_block_add_instr(block, call);
|
||||
|
||||
hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
|
||||
{
|
||||
struct hlsl_ir_function_decl *func;
|
||||
@@ -14479,6 +14536,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v
|
||||
if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0))
|
||||
{
|
||||
lower_ir(ctx, lower_countbits, body);
|
||||
lower_ir(ctx, lower_ctz, body);
|
||||
lower_ir(ctx, lower_f16tof32, body);
|
||||
lower_ir(ctx, lower_f32tof16, body);
|
||||
lower_ir(ctx, lower_find_msb, body);
|
||||
|
||||
@@ -334,6 +334,34 @@ static bool fold_countbits(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool fold_ctz(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
|
||||
const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src)
|
||||
{
|
||||
enum hlsl_base_type type = dst_type->e.numeric.type;
|
||||
unsigned int k;
|
||||
|
||||
VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
|
||||
|
||||
for (k = 0; k < dst_type->e.numeric.dimx; ++k)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case HLSL_TYPE_UINT:
|
||||
if (!src->value.u[k].u)
|
||||
dst->u[k].u = ~0u;
|
||||
else
|
||||
dst->u[k].u = vkd3d_ctz(src->value.u[k].u);
|
||||
break;
|
||||
|
||||
default:
|
||||
FIXME("Fold 'ctz' for type %s.\n", debug_hlsl_type(ctx, dst_type));
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
|
||||
const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src)
|
||||
{
|
||||
@@ -1469,6 +1497,10 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
|
||||
success = fold_countbits(ctx, &res, instr->data_type, arg1);
|
||||
break;
|
||||
|
||||
case HLSL_OP1_CTZ:
|
||||
success = fold_ctz(ctx, &res, instr->data_type, arg1);
|
||||
break;
|
||||
|
||||
case HLSL_OP1_EXP2:
|
||||
success = fold_exp2(ctx, &res, instr->data_type, arg1);
|
||||
break;
|
||||
|
||||
@@ -4275,6 +4275,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
|
||||
case VSIR_OP_F16TOF32:
|
||||
case VSIR_OP_F32TOF16:
|
||||
case VSIR_OP_FIRSTBIT_HI:
|
||||
case VSIR_OP_FIRSTBIT_LO:
|
||||
case VSIR_OP_FIRSTBIT_SHI:
|
||||
case VSIR_OP_FRC:
|
||||
case VSIR_OP_FTOI:
|
||||
|
||||
@@ -254,7 +254,7 @@ probe (0, 0) rgba (0.0, 1.0, 1.0, 0.0)
|
||||
format r32g32b32a32-uint
|
||||
size (2d, 640, 480)
|
||||
|
||||
[pixel shader todo]
|
||||
[pixel shader]
|
||||
uint4 u;
|
||||
|
||||
uint4 main() : sv_target
|
||||
@@ -264,10 +264,10 @@ uint4 main() : sv_target
|
||||
|
||||
[test]
|
||||
uniform 0 uint4 0 0xffffffff 0x00001000 0x00760400
|
||||
todo(sm<6 | msl & sm>=6) draw quad
|
||||
todo(msl & sm>=6) draw quad
|
||||
probe (0, 0) u32(0xffffffff, 0, 12, 10)
|
||||
|
||||
[pixel shader todo]
|
||||
[pixel shader]
|
||||
uint4 main(float4 pos : sv_position) : sv_target
|
||||
{
|
||||
uint4x4 umat =
|
||||
@@ -282,7 +282,7 @@ uint4 main(float4 pos : sv_position) : sv_target
|
||||
}
|
||||
|
||||
[test]
|
||||
todo(sm<6 | msl & sm>=6) draw quad
|
||||
todo(msl & sm>=6) draw quad
|
||||
probe(0, 0) u32(0, 1, 0, 2)
|
||||
probe(1, 0) u32(0, 1, 0, 3)
|
||||
probe(2, 0) u32(0, 1, 0, 2)
|
||||
|
||||
Reference in New Issue
Block a user