Files
wine-staging/patches/vkd3d-latest/0005-Updated-vkd3d-to-cb8c851bec08a9483ce52f57c71922c9b14.patch
Alistair Leslie-Hughes 815194acd6 Updated vkd3d-latest patchset
2025-10-01 07:52:38 +10:00

831 lines
34 KiB
Diff

From e19754650a2fae6ebfb0a471026cc5e9683a8237 Mon Sep 17 00:00:00 2001
From: Alistair Leslie-Hughes <leslie_alistair@hotmail.com>
Date: Tue, 23 Sep 2025 07:11:30 +1000
Subject: [PATCH] Updated vkd3d to cb8c851bec08a9483ce52f57c71922c9b145b9d3.
---
libs/vkd3d/include/private/vkd3d_common.h | 18 ++
libs/vkd3d/libs/vkd3d-shader/hlsl.c | 9 +
libs/vkd3d/libs/vkd3d-shader/hlsl.h | 6 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.y | 101 ++++++++++
libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 183 ++++++++++++++++++
.../libs/vkd3d-shader/hlsl_constant_ops.c | 106 ++++++++++
libs/vkd3d/libs/vkd3d-shader/ir.c | 64 ++++++
libs/vkd3d/libs/vkd3d-shader/spirv.c | 20 +-
libs/vkd3d/libs/vkd3d-shader/tpf.c | 4 +
9 files changed, 504 insertions(+), 7 deletions(-)
diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h
index 8b63acf68e1..c2e957a2fea 100644
--- a/libs/vkd3d/include/private/vkd3d_common.h
+++ b/libs/vkd3d/include/private/vkd3d_common.h
@@ -343,6 +343,24 @@ static inline unsigned int vkd3d_log2i(unsigned int x)
#endif
}
+static inline unsigned int vkd3d_ctz(uint32_t v)
+{
+#ifdef HAVE_BUILTIN_CTZ
+ return __builtin_ctz(v);
+#else
+ unsigned int c = 31;
+
+ v &= -v;
+ c = (v & 0x0000ffff) ? c - 16 : c;
+ c = (v & 0x00ff00ff) ? c - 8 : c;
+ c = (v & 0x0f0f0f0f) ? c - 4 : c;
+ c = (v & 0x33333333) ? c - 2 : c;
+ c = (v & 0x55555555) ? c - 1 : c;
+
+ return c;
+#endif
+}
+
static inline void *vkd3d_memmem( const void *haystack, size_t haystack_len, const void *needle, size_t needle_len)
{
const char *str = haystack;
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
index 5a25efdee75..ec1e27d9496 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
@@ -393,6 +393,11 @@ bool hlsl_type_is_signed_integer(const struct hlsl_type *type)
vkd3d_unreachable();
}
+bool hlsl_type_is_unsigned_integer(const struct hlsl_type *type)
+{
+ return hlsl_type_is_integer(type) && !hlsl_type_is_signed_integer(type);
+}
+
bool hlsl_type_is_integer(const struct hlsl_type *type)
{
VKD3D_ASSERT(hlsl_is_numeric_type(type));
@@ -3724,8 +3729,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op)
[HLSL_OP1_BIT_NOT] = "~",
[HLSL_OP1_CAST] = "cast",
[HLSL_OP1_CEIL] = "ceil",
+ [HLSL_OP1_CLZ] = "clz",
[HLSL_OP1_COS] = "cos",
[HLSL_OP1_COS_REDUCED] = "cos_reduced",
+ [HLSL_OP1_COUNTBITS] = "countbits",
+ [HLSL_OP1_CTZ] = "ctz",
[HLSL_OP1_DSX] = "dsx",
[HLSL_OP1_DSX_COARSE] = "dsx_coarse",
[HLSL_OP1_DSX_FINE] = "dsx_fine",
@@ -3735,6 +3743,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op)
[HLSL_OP1_EXP2] = "exp2",
[HLSL_OP1_F16TOF32] = "f16tof32",
[HLSL_OP1_F32TOF16] = "f32tof16",
+ [HLSL_OP1_FIND_MSB] = "find_msb",
[HLSL_OP1_FLOOR] = "floor",
[HLSL_OP1_FRACT] = "fract",
[HLSL_OP1_ISINF] = "isinf",
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
index a3e8ccc1e2a..8dbfd062177 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
@@ -718,8 +718,11 @@ enum hlsl_ir_expr_op
HLSL_OP1_BIT_NOT,
HLSL_OP1_CAST,
HLSL_OP1_CEIL,
+ HLSL_OP1_CLZ,
HLSL_OP1_COS,
HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi], writes to .x */
+ HLSL_OP1_COUNTBITS,
+ HLSL_OP1_CTZ,
HLSL_OP1_DSX,
HLSL_OP1_DSX_COARSE,
HLSL_OP1_DSX_FINE,
@@ -729,6 +732,7 @@ enum hlsl_ir_expr_op
HLSL_OP1_EXP2,
HLSL_OP1_F16TOF32,
HLSL_OP1_F32TOF16,
+ HLSL_OP1_FIND_MSB,
HLSL_OP1_FLOOR,
HLSL_OP1_FRACT,
HLSL_OP1_ISINF,
@@ -1791,10 +1795,10 @@ bool hlsl_type_is_integer(const struct hlsl_type *type);
bool hlsl_type_is_floating_point(const struct hlsl_type *type);
bool hlsl_type_is_row_major(const struct hlsl_type *type);
bool hlsl_type_is_signed_integer(const struct hlsl_type *type);
+bool hlsl_type_is_unsigned_integer(const struct hlsl_type *type);
unsigned int hlsl_type_minor_size(const struct hlsl_type *type);
unsigned int hlsl_type_major_size(const struct hlsl_type *type);
unsigned int hlsl_type_element_count(const struct hlsl_type *type);
-bool hlsl_type_is_integer(const struct hlsl_type *type);
bool hlsl_type_is_minimum_precision(const struct hlsl_type *type);
bool hlsl_type_is_resource(const struct hlsl_type *type);
bool hlsl_type_is_shader(const struct hlsl_type *type);
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
index d83ad9fe7d8..d3004d7cc8a 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
@@ -3181,6 +3181,20 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx,
return true;
}
+static bool elementwise_intrinsic_int_convert_args(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_type *type;
+
+ if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc)))
+ return false;
+
+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_INT, type->e.numeric.dimx, type->e.numeric.dimy);
+
+ convert_args(ctx, params, type, loc);
+ return true;
+}
+
static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
@@ -3579,6 +3593,20 @@ static bool intrinsic_cosh(struct hlsl_ctx *ctx,
return write_cosh_or_sinh(ctx, params, loc, false);
}
+static bool intrinsic_countbits(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
+ struct hlsl_type *type;
+
+ if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc))
+ return false;
+ type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT);
+
+ operands[0] = params->args[0];
+ return add_expr(ctx, params->instrs, HLSL_OP1_COUNTBITS, operands, type, loc);
+}
+
static bool intrinsic_cross(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
@@ -3925,6 +3953,76 @@ static bool intrinsic_f32tof16(struct hlsl_ctx *ctx,
return add_expr(ctx, params->instrs, HLSL_OP1_F32TOF16, operands, type, loc);
}
+static bool intrinsic_firstbithigh(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
+ struct hlsl_type *type = params->args[0]->data_type;
+ struct hlsl_ir_node *c, *clz, *eq, *xor;
+ struct hlsl_constant_value v;
+
+ if (hlsl_version_lt(ctx, 4, 0))
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
+ "The 'firstbithigh' intrinsic requires shader model 4.0 or higher.");
+
+ if (hlsl_type_is_unsigned_integer(type))
+ {
+ if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc))
+ return false;
+ }
+ else
+ {
+ if (!elementwise_intrinsic_int_convert_args(ctx, params, loc))
+ return false;
+ }
+ type = convert_numeric_type(ctx, type, HLSL_TYPE_UINT);
+
+ operands[0] = params->args[0];
+ if (hlsl_version_lt(ctx, 5, 0))
+ return add_expr(ctx, params->instrs, HLSL_OP1_FIND_MSB, operands, type, loc);
+
+ v.u[0].u = 0x1f;
+ if (!(c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &v, loc)))
+ return false;
+ hlsl_block_add_instr(params->instrs, c);
+
+ if (!(clz = add_expr(ctx, params->instrs, HLSL_OP1_CLZ, operands, type, loc)))
+ return false;
+ if (!(xor = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_BIT_XOR, c, clz, loc)))
+ return false;
+
+ v.u[0].i = -1;
+ if (!(c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &v, loc)))
+ return false;
+ hlsl_block_add_instr(params->instrs, c);
+
+ if (!(eq = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_EQUAL, clz, c, loc)))
+ return false;
+
+ operands[0] = eq;
+ operands[1] = add_implicit_conversion(ctx, params->instrs, c, type, loc);
+ operands[2] = xor;
+ return add_expr(ctx, params->instrs, HLSL_OP3_TERNARY, operands, type, loc);
+}
+
+static bool intrinsic_firstbitlow(struct hlsl_ctx *ctx,
+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
+ struct hlsl_type *type;
+
+ if (hlsl_version_lt(ctx, 4, 0))
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
+ "The 'firstbitlow' intrinsic requires shader model 4.0 or higher.");
+
+ if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc))
+ return false;
+ type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT);
+
+ operands[0] = params->args[0];
+ return add_expr(ctx, params->instrs, HLSL_OP1_CTZ, operands, type, loc);
+}
+
static bool intrinsic_floor(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
@@ -5317,6 +5415,7 @@ intrinsic_functions[] =
{"clip", 1, true, intrinsic_clip},
{"cos", 1, true, intrinsic_cos},
{"cosh", 1, true, intrinsic_cosh},
+ {"countbits", 1, true, intrinsic_countbits},
{"cross", 2, true, intrinsic_cross},
{"ddx", 1, true, intrinsic_ddx},
{"ddx_coarse", 1, true, intrinsic_ddx_coarse},
@@ -5334,6 +5433,8 @@ intrinsic_functions[] =
{"f16tof32", 1, true, intrinsic_f16tof32},
{"f32tof16", 1, true, intrinsic_f32tof16},
{"faceforward", 3, true, intrinsic_faceforward},
+ {"firstbithigh", 1, true, intrinsic_firstbithigh},
+ {"firstbitlow", 1, true, intrinsic_firstbitlow},
{"floor", 1, true, intrinsic_floor},
{"fmod", 2, true, intrinsic_fmod},
{"frac", 1, true, intrinsic_frac},
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
index bb8fdb8bd60..2138c3776a7 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
@@ -11057,11 +11057,32 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx,
generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_ROUND_PI, 0, 0, true);
return true;
+ case HLSL_OP1_CLZ:
+ VKD3D_ASSERT(hlsl_type_is_integer(dst_type));
+ VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0));
+ if (hlsl_type_is_signed_integer(src_type))
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_FIRSTBIT_SHI, 0, 0, true);
+ else
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_FIRSTBIT_HI, 0, 0, true);
+ return true;
+
case HLSL_OP1_COS:
VKD3D_ASSERT(type_is_float(dst_type));
sm4_generate_vsir_expr_with_two_destinations(ctx, program, VSIR_OP_SINCOS, expr, 1);
return true;
+ case HLSL_OP1_COUNTBITS:
+ VKD3D_ASSERT(hlsl_type_is_integer(dst_type));
+ VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_COUNTBITS, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_CTZ:
+ VKD3D_ASSERT(hlsl_type_is_integer(dst_type));
+ VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_FIRSTBIT_LO, 0, 0, true);
+ return true;
+
case HLSL_OP1_DSX:
VKD3D_ASSERT(type_is_float(dst_type));
generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_DSX, 0, 0, true);
@@ -14097,6 +14118,102 @@ static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc
hlsl_transform_ir(ctx, resolve_loops, block, NULL);
}
+static bool lower_countbits(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
+{
+ struct hlsl_ir_function_decl *func;
+ struct hlsl_ir_node *call, *rhs;
+ struct hlsl_ir_expr *expr;
+ struct hlsl_ir_var *lhs;
+ char *body;
+
+ /* Like vkd3d_popcount(). */
+ static const char template[] =
+ "typedef uint%u uintX;\n"
+ "uintX countbits(uintX v)\n"
+ "{\n"
+ " v -= (v >> 1) & 0x55555555;\n"
+ " v = (v & 0x33333333) + ((v >> 2) & 0x33333333);\n"
+ " return (((v + (v >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24;\n"
+ "}\n";
+
+ if (node->type != HLSL_IR_EXPR)
+ return false;
+
+ expr = hlsl_ir_expr(node);
+ if (expr->op != HLSL_OP1_COUNTBITS)
+ return false;
+
+ rhs = expr->operands[0].node;
+ if (!(body = hlsl_sprintf_alloc(ctx, template, hlsl_type_component_count(rhs->data_type))))
+ return false;
+ func = hlsl_compile_internal_function(ctx, "countbits", body);
+ vkd3d_free(body);
+ if (!func)
+ return false;
+
+ lhs = func->parameters.vars[0];
+ hlsl_block_add_simple_store(ctx, block, lhs, rhs);
+
+ if (!(call = hlsl_new_call(ctx, func, &node->loc)))
+ return false;
+ hlsl_block_add_instr(block, call);
+
+ hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc);
+
+ return true;
+}
+
+static bool lower_ctz(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
+{
+ struct hlsl_ir_function_decl *func;
+ struct hlsl_ir_node *call, *rhs;
+ struct hlsl_ir_expr *expr;
+ struct hlsl_ir_var *lhs;
+ char *body;
+
+ /* ctz() returns the bit number of the least significant 1-bit.
+ * Bit numbers count from the least significant bit. */
+ static const char template[] =
+ "typedef uint%u uintX;\n"
+ "uintX ctz(uintX v)\n"
+ "{\n"
+ " uintX c = 31;\n"
+ " v &= -v;\n"
+ " c = (v & 0x0000ffff) ? c - 16 : c;\n"
+ " c = (v & 0x00ff00ff) ? c - 8 : c;\n"
+ " c = (v & 0x0f0f0f0f) ? c - 4 : c;\n"
+ " c = (v & 0x33333333) ? c - 2 : c;\n"
+ " c = (v & 0x55555555) ? c - 1 : c;\n"
+ " return v ? c : -1;\n"
+ "}\n";
+
+ if (node->type != HLSL_IR_EXPR)
+ return false;
+
+ expr = hlsl_ir_expr(node);
+ if (expr->op != HLSL_OP1_CTZ)
+ return false;
+
+ rhs = expr->operands[0].node;
+ if (!(body = hlsl_sprintf_alloc(ctx, template, hlsl_type_component_count(rhs->data_type))))
+ return false;
+ func = hlsl_compile_internal_function(ctx, "ctz", body);
+ vkd3d_free(body);
+ if (!func)
+ return false;
+
+ lhs = func->parameters.vars[0];
+ hlsl_block_add_simple_store(ctx, block, lhs, rhs);
+
+ if (!(call = hlsl_new_call(ctx, func, &node->loc)))
+ return false;
+ hlsl_block_add_instr(block, call);
+
+ hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc);
+
+ return true;
+}
+
static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
{
struct hlsl_ir_function_decl *func;
@@ -14239,6 +14356,69 @@ static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru
return true;
}
+static bool lower_find_msb(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
+{
+ struct hlsl_ir_function_decl *func;
+ struct hlsl_ir_node *call, *rhs;
+ struct hlsl_ir_expr *expr;
+ struct hlsl_ir_var *lhs;
+ char *body;
+
+ /* For positive numbers, find_msb() returns the bit number of the most
+ * significant 1-bit. For negative numbers, it returns the bit number of
+ * the most significant 0-bit. Bit numbers count from the least
+ * significant bit. */
+ static const char template[] =
+ "typedef %s intX;\n"
+ "uint%u find_msb(intX v)\n"
+ "{\n"
+ " intX c, mask;\n"
+ " v = v < 0 ? ~v : v;\n"
+ " mask = v & 0xffff0000;\n"
+ " v = mask ? mask : v;\n"
+ " c = mask ? 16 : v ? 0 : -1;\n"
+ " mask = v & 0xff00ff00;\n"
+ " v = mask ? mask : v;\n"
+ " c = mask ? c + 8 : c;\n"
+ " mask = v & 0xf0f0f0f0;\n"
+ " v = mask ? mask : v;\n"
+ " c = mask ? c + 4 : c;\n"
+ " mask = v & 0xcccccccc;\n"
+ " v = mask ? mask : v;\n"
+ " c = mask ? c + 2 : c;\n"
+ " mask = v & 0xaaaaaaaa;\n"
+ " v = mask ? mask : v;\n"
+ " c = mask ? c + 1 : c;\n"
+ " return c;\n"
+ "}\n";
+
+ if (node->type != HLSL_IR_EXPR)
+ return false;
+
+ expr = hlsl_ir_expr(node);
+ if (expr->op != HLSL_OP1_FIND_MSB)
+ return false;
+
+ rhs = expr->operands[0].node;
+ if (!(body = hlsl_sprintf_alloc(ctx, template, rhs->data_type->name, hlsl_type_component_count(rhs->data_type))))
+ return false;
+ func = hlsl_compile_internal_function(ctx, "find_msb", body);
+ vkd3d_free(body);
+ if (!func)
+ return false;
+
+ lhs = func->parameters.vars[0];
+ hlsl_block_add_simple_store(ctx, block, lhs, rhs);
+
+ if (!(call = hlsl_new_call(ctx, func, &node->loc)))
+ return false;
+ hlsl_block_add_instr(block, call);
+
+ hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc);
+
+ return true;
+}
+
static bool lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
{
struct hlsl_ir_function_decl *func;
@@ -14355,8 +14535,11 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v
if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0))
{
+ lower_ir(ctx, lower_countbits, body);
+ lower_ir(ctx, lower_ctz, body);
lower_ir(ctx, lower_f16tof32, body);
lower_ir(ctx, lower_f32tof16, body);
+ lower_ir(ctx, lower_find_msb, body);
}
lower_ir(ctx, lower_isinf, body);
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c
index 252ed51a4e4..7b3b0470d5d 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c
@@ -250,6 +250,35 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
return true;
}
+static bool fold_clz(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
+ const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src)
+{
+ enum hlsl_base_type type = src->node.data_type->e.numeric.type;
+ unsigned int k, v;
+
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
+ {
+ switch (type)
+ {
+ case HLSL_TYPE_INT:
+ v = src->value.u[k].i < 0 ? ~src->value.u[k].u : src->value.u[k].u;
+ break;
+
+ case HLSL_TYPE_UINT:
+ v = src->value.u[k].u;
+ break;
+
+ default:
+ FIXME("Fold 'clz' for type %s.\n", debug_hlsl_type(ctx, dst_type));
+ return false;
+ }
+
+ dst->u[k].u = v ? vkd3d_log2i(v) ^ 0x1f : ~0u;
+ }
+
+ return true;
+}
+
static bool fold_cos(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src)
{
@@ -280,6 +309,59 @@ static bool fold_cos(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
return true;
}
+static bool fold_countbits(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
+ const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src)
+{
+ enum hlsl_base_type type = dst_type->e.numeric.type;
+ unsigned int k;
+
+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
+
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
+ {
+ switch (type)
+ {
+ case HLSL_TYPE_UINT:
+ dst->u[k].u = vkd3d_popcount(src->value.u[k].u);
+ break;
+
+ default:
+ FIXME("Fold 'countbits' for type %s.\n", debug_hlsl_type(ctx, dst_type));
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool fold_ctz(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
+ const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src)
+{
+ enum hlsl_base_type type = dst_type->e.numeric.type;
+ unsigned int k;
+
+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type);
+
+ for (k = 0; k < dst_type->e.numeric.dimx; ++k)
+ {
+ switch (type)
+ {
+ case HLSL_TYPE_UINT:
+ if (!src->value.u[k].u)
+ dst->u[k].u = ~0u;
+ else
+ dst->u[k].u = vkd3d_ctz(src->value.u[k].u);
+ break;
+
+ default:
+ FIXME("Fold 'ctz' for type %s.\n", debug_hlsl_type(ctx, dst_type));
+ return false;
+ }
+ }
+
+ return true;
+}
+
static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst,
const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src)
{
@@ -1403,10 +1485,22 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
success = fold_ceil(ctx, &res, instr->data_type, arg1);
break;
+ case HLSL_OP1_CLZ:
+ success = fold_clz(ctx, &res, instr->data_type, arg1);
+ break;
+
case HLSL_OP1_COS:
success = fold_cos(ctx, &res, instr->data_type, arg1);
break;
+ case HLSL_OP1_COUNTBITS:
+ success = fold_countbits(ctx, &res, instr->data_type, arg1);
+ break;
+
+ case HLSL_OP1_CTZ:
+ success = fold_ctz(ctx, &res, instr->data_type, arg1);
+ break;
+
case HLSL_OP1_EXP2:
success = fold_exp2(ctx, &res, instr->data_type, arg1);
break;
@@ -1555,6 +1649,7 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
+ static const struct hlsl_constant_value zero;
struct hlsl_ir_constant *const_arg = NULL;
struct hlsl_ir_node *mut_arg = NULL;
struct hlsl_ir_node *res_node;
@@ -1617,6 +1712,17 @@ bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *in
res_node = &const_arg->node;
break;
+ case HLSL_OP2_LESS:
+ /* x < 0 -> false, if x is unsigned. */
+ if (!hlsl_type_is_unsigned_integer(expr->operands[0].node->data_type)
+ || expr->operands[1].node->type != HLSL_IR_CONSTANT
+ || !hlsl_constant_is_zero(hlsl_ir_constant(expr->operands[1].node)))
+ break;
+ if (!(res_node = hlsl_new_constant(ctx, instr->data_type, &zero, &instr->loc)))
+ break;
+ list_add_before(&expr->node.entry, &res_node->entry);
+ break;
+
default:
break;
}
diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c
index a242f32d824..d765b5acccd 100644
--- a/libs/vkd3d/libs/vkd3d-shader/ir.c
+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c
@@ -9885,6 +9885,37 @@ static void vsir_validate_io_register(struct validation_context *ctx, const stru
reg->idx[control_point_index].offset, control_point_count, reg->type);
}
+static void vsir_validate_texture_register(struct validation_context *ctx, const struct vkd3d_shader_register *reg)
+{
+ const struct vkd3d_shader_version *version = &ctx->program->shader_version;
+ uint32_t idx;
+
+ if (version->type != VKD3D_SHADER_TYPE_PIXEL)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "TEXTURE registers cannot be used in shader type %#x.", version->type);
+
+ if (reg->idx_count != 1)
+ {
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT,
+ "Invalid index count %u for a TEXTURE register.", reg->idx_count);
+ return;
+ }
+
+ if (reg->idx[0].rel_addr)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
+ "Non-NULL relative address for a TEXTURE register.");
+
+ if (version->major >= 3)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "TEXTURE registers cannot be used in version %u.%u.", version->major, version->minor);
+
+ idx = reg->idx[0].offset;
+ if (idx >= 8 || (vkd3d_shader_ver_le(version, 1, 4) && idx >= 6)
+ || (vkd3d_shader_ver_le(version, 1, 3) && idx >= 4))
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
+ "Register t%u exceeds limits for version %u.%u.", idx, version->major, version->minor);
+}
+
static void vsir_validate_temp_register(struct validation_context *ctx,
const struct vkd3d_shader_register *reg)
{
@@ -10354,6 +10385,10 @@ static void vsir_validate_register(struct validation_context *ctx,
vsir_validate_register_without_indices(ctx, reg);
break;
+ case VKD3DSPR_TEXTURE:
+ vsir_validate_texture_register(ctx, reg);
+ break;
+
case VKD3DSPR_SSA:
vsir_validate_ssa_register(ctx, reg);
break;
@@ -10402,6 +10437,8 @@ static void vsir_validate_io_dst_param(struct validation_context *ctx,
static void vsir_validate_dst_param(struct validation_context *ctx,
const struct vkd3d_shader_dst_param *dst)
{
+ const struct vkd3d_shader_version *version = &ctx->program->shader_version;
+
vsir_validate_register(ctx, &dst->reg);
if (dst->write_mask & ~VKD3DSP_WRITEMASK_ALL)
@@ -10531,6 +10568,11 @@ static void vsir_validate_dst_param(struct validation_context *ctx,
vsir_validate_io_dst_param(ctx, dst);
break;
+ case VKD3DSPR_TEXTURE:
+ if (vkd3d_shader_ver_ge(version, 1, 4))
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "Texture registers cannot be written in version %u.%u.", version->major, version->minor);
+
default:
break;
}
@@ -11511,6 +11553,27 @@ static void vsir_validate_shift_operation(struct validation_context *ctx,
vsir_opcode_get_name(instruction->opcode, "<unknown>"), instruction->opcode);
}
+static void vsir_validate_bem(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
+{
+ const struct vkd3d_shader_version *version = &ctx->program->shader_version;
+
+ if (version->type != VKD3D_SHADER_TYPE_PIXEL)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
+ "BEM cannot be used in shader type %#x.", version->type);
+
+ if (version->major != 1 || version->minor != 4)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_OPCODE,
+ "BEM cannot be used in version %u.%u.", version->major, version->minor);
+
+ if (instruction->dst[0].write_mask != 0x3)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK,
+ "Invalid BEM write mask %#x.", instruction->dst[0].write_mask);
+
+ /* Strictly not an elementwise operation, but we expect all the arguments
+ * to be float. */
+ vsir_validate_float_elementwise_operation(ctx, instruction);
+}
+
static void vsir_validate_branch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
{
size_t i;
@@ -12353,6 +12416,7 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[
[VSIR_OP_AND] = {1, 2, vsir_validate_logic_elementwise_operation},
[VSIR_OP_ASIN] = {1, 1, vsir_validate_float_elementwise_operation},
[VSIR_OP_ATAN] = {1, 1, vsir_validate_float_elementwise_operation},
+ [VSIR_OP_BEM] = {1, 2, vsir_validate_bem},
[VSIR_OP_BRANCH] = {0, ~0u, vsir_validate_branch},
[VSIR_OP_DADD] = {1, 2, vsir_validate_double_elementwise_operation},
[VSIR_OP_DCL_GS_INSTANCES] = {0, 0, vsir_validate_dcl_gs_instances},
diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c
index 0fdeba75c58..434be05e786 100644
--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c
+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c
@@ -3254,6 +3254,14 @@ static struct spirv_compiler *spirv_compiler_create(struct vsir_program *program
compiler->features = option->value;
break;
+ case VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER:
+ case VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY:
+ case VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT:
+ case VKD3D_SHADER_COMPILE_OPTION_WARN_IMPLICIT_TRUNCATION:
+ case VKD3D_SHADER_COMPILE_OPTION_INCLUDE_EMPTY_BUFFERS_IN_EFFECTS:
+ /* Explicitly ignored for this target. */
+ break;
+
default:
WARN("Ignoring unrecognised option %#x with value %#x.\n", option->name, option->value);
break;
@@ -8347,8 +8355,8 @@ static void spirv_compiler_emit_f32tof16(struct spirv_compiler *compiler,
unsigned int i, j;
instr_set_id = vkd3d_spirv_get_glsl_std450_instr_set(builder);
- type_id = spirv_get_type_id_for_component_type(builder, VKD3D_SHADER_COMPONENT_FLOAT, 2);
- scalar_type_id = spirv_get_type_id_for_component_type(builder, VKD3D_SHADER_COMPONENT_UINT, 1);
+ type_id = spirv_get_type_id(builder, VSIR_DATA_F32, 2);
+ scalar_type_id = spirv_get_type_id(builder, VSIR_DATA_U32, 1);
zero_id = spirv_compiler_get_constant_float(compiler, 0.0f);
/* FIXME: Consider a single PackHalf2x16 instruction per 2 components. */
@@ -8427,7 +8435,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co
src0_id = spirv_compiler_emit_load_src(compiler, &src[0], write_mask);
src1_id = spirv_compiler_emit_load_src(compiler, &src[1], write_mask);
- type_id = spirv_get_type_id_for_component_type(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count);
+ type_id = spirv_get_type_id(builder, VSIR_DATA_BOOL, component_count);
result_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream,
op, type_id, src0_id, src1_id);
@@ -8479,7 +8487,7 @@ static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compil
src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask);
src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask);
- type_id = spirv_get_type_id_for_component_type(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count);
+ type_id = spirv_get_type_id(builder, VSIR_DATA_BOOL, component_count);
result_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, type_id, src0_id, src1_id);
result_id = spirv_compiler_emit_bool_to_float(compiler, component_count, result_id, false);
@@ -9054,7 +9062,7 @@ static void spirv_compiler_emit_lod(struct spirv_compiler *compiler,
spirv_compiler_prepare_image(compiler, &image,
&resource->reg, &sampler->reg, VKD3D_IMAGE_FLAG_SAMPLED);
- type_id = spirv_get_type_id_for_component_type(builder, VKD3D_SHADER_COMPONENT_FLOAT, 2);
+ type_id = spirv_get_type_id(builder, VSIR_DATA_F32, 2);
coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_ALL);
val_id = vkd3d_spirv_build_op_image_query_lod(builder,
type_id, image.sampled_image_id, coordinate_id);
@@ -9388,7 +9396,7 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler,
if (!spirv_compiler_get_register_info(compiler, &resource->reg, &reg_info))
return;
- type_id = spirv_get_type_id_for_component_type(builder, VKD3D_SHADER_COMPONENT_UINT, 1);
+ type_id = spirv_get_type_id(builder, VSIR_DATA_U32, 1);
ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info.storage_class, type_id);
base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler,
type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0);
diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c
index 7ff2a305cfa..8f2d3dd48ca 100644
--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c
+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c
@@ -4248,6 +4248,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
case VSIR_OP_BREAK:
case VSIR_OP_CASE:
case VSIR_OP_CONTINUE:
+ case VSIR_OP_COUNTBITS:
case VSIR_OP_CUT:
case VSIR_OP_CUT_STREAM:
case VSIR_OP_DCL_STREAM:
@@ -4273,6 +4274,9 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
case VSIR_OP_EXP:
case VSIR_OP_F16TOF32:
case VSIR_OP_F32TOF16:
+ case VSIR_OP_FIRSTBIT_HI:
+ case VSIR_OP_FIRSTBIT_LO:
+ case VSIR_OP_FIRSTBIT_SHI:
case VSIR_OP_FRC:
case VSIR_OP_FTOI:
case VSIR_OP_FTOU:
--
2.51.0